1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.3"
61 __versionTime__ = "11 May 2016 15:17 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import functools
74 import itertools
75 import traceback
76
77
78
79 __all__ = [
80 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
81 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
82 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
83 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
84 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
85 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
86 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
87 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
88 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
89 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
90 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
91 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
92 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
93 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
94 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
95 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
96 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
97 ]
98
99 PY_3 = sys.version.startswith('3')
100 if PY_3:
101 _MAX_INT = sys.maxsize
102 basestring = str
103 unichr = chr
104 _ustr = str
105
106
107 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
108
109 else:
110 _MAX_INT = sys.maxint
111 range = xrange
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
116 then < returns the unicode object | encodes it with the default encoding | ... >.
117 """
118 if isinstance(obj,unicode):
119 return obj
120
121 try:
122
123
124 return str(obj)
125
126 except UnicodeEncodeError:
127
128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
129 xmlcharref = Regex('&#\d+;')
130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
131 return xmlcharref.transformString(ret)
132
133
134 singleArgBuiltins = []
135 import __builtin__
136 for fname in "sum len sorted reversed list tuple set any all min max".split():
137 try:
138 singleArgBuiltins.append(getattr(__builtin__,fname))
139 except AttributeError:
140 continue
141
142 _generatorType = type((y for y in range(1)))
145 """Escape &, <, >, ", ', etc. in a string of data."""
146
147
148 from_symbols = '&><"\''
149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
150 for from_,to_ in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
152 return data
153
156
157 alphas = string.ascii_uppercase + string.ascii_lowercase
158 nums = "0123456789"
159 hexnums = nums + "ABCDEFabcdef"
160 alphanums = alphas + nums
161 _bslash = chr(92)
162 printables = "".join(c for c in string.printable if c not in string.whitespace)
165 """base exception class for all parsing runtime exceptions"""
166
167
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc
170 if msg is None:
171 self.msg = pstr
172 self.pstr = ""
173 else:
174 self.msg = msg
175 self.pstr = pstr
176 self.parserElement = elem
177
179 """supported attributes by name are:
180 - lineno - returns the line number of the exception text
181 - col - returns the column number of the exception text
182 - line - returns the line containing the exception text
183 """
184 if( aname == "lineno" ):
185 return lineno( self.loc, self.pstr )
186 elif( aname in ("col", "column") ):
187 return col( self.loc, self.pstr )
188 elif( aname == "line" ):
189 return line( self.loc, self.pstr )
190 else:
191 raise AttributeError(aname)
192
194 return "%s (at char %d), (line:%d, col:%d)" % \
195 ( self.msg, self.loc, self.lineno, self.column )
209 return "lineno col line".split() + dir(type(self))
210
212 """exception thrown when parse expressions don't match class;
213 supported attributes by name are:
214 - lineno - returns the line number of the exception text
215 - col - returns the column number of the exception text
216 - line - returns the line containing the exception text
217 """
218 pass
219
221 """user-throwable exception thrown when inconsistent parse content
222 is found; stops all parsing immediately"""
223 pass
224
226 """just like C{L{ParseFatalException}}, but thrown internally when an
227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
228 an unbacktrackable syntax error has been found"""
232
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
260 return repr(self.tup)
262 self.tup = (self.tup[0],i)
263
265 """Structured parse results, to provide multiple means of access to the parsed data:
266 - as a list (C{len(results)})
267 - by list index (C{results[0], results[1]}, etc.)
268 - by attribute (C{results.<resultsName>})
269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls):
272 return toklist
273 retobj = object.__new__(cls)
274 retobj.__doinit = True
275 return retobj
276
277
278
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit:
281 self.__doinit = False
282 self.__name = None
283 self.__parent = None
284 self.__accumNames = {}
285 self.__asList = asList
286 self.__modal = modal
287 if toklist is None:
288 toklist = []
289 if isinstance(toklist, list):
290 self.__toklist = toklist[:]
291 elif isinstance(toklist, _generatorType):
292 self.__toklist = list(toklist)
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name is not None and name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name)
302 self.__name = name
303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,(int,slice)):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353
354
355
356
357
358
359 for name,occurrences in self.__tokdict.items():
360 for j in removed:
361 for k, (value, position) in enumerate(occurrences):
362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
363 else:
364 del self.__tokdict[i]
365
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
375 """Returns all named result keys."""
376 if hasattr(self.__tokdict, "iterkeys"):
377 return self.__tokdict.iterkeys()
378 else:
379 return iter(self.__tokdict)
380
382 """Returns all named result values."""
383 return (self[k] for k in self.iterkeys())
384
386 return ((k, self[k]) for k in self.iterkeys())
387
388 if PY_3:
389 keys = iterkeys
390 values = itervalues
391 items = iteritems
392 else:
394 """Returns all named result keys."""
395 return list(self.iterkeys())
396
398 """Returns all named result values."""
399 return list(self.itervalues())
400
402 """Returns all named result keys and values as a list of tuples."""
403 return list(self.iteritems())
404
406 """Since keys() returns an iterator, this method is helpful in bypassing
407 code that looks for the existence of any defined results names."""
408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last).
412 Supports both list and dict semantics for pop(). If passed no
413 argument or an integer argument, it will use list semantics
414 and pop tokens from the list of parsed tokens. If passed a
415 non-integer argument (most likely a string), it will use dict
416 semantics and pop the corresponding value from any defined
417 results names. A second default return value argument is
418 supported, just as in dict.pop()."""
419 if not args:
420 args = [-1]
421 for k,v in kwargs.items():
422 if k == 'default':
423 args = (args[0], v)
424 else:
425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
426 if (isinstance(args[0], int) or
427 len(args) == 1 or
428 args[0] in self):
429 index = args[0]
430 ret = self[index]
431 del self[index]
432 return ret
433 else:
434 defaultvalue = args[1]
435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no
439 such name, then returns the given C{defaultValue} or C{None} if no
440 C{defaultValue} is specified."""
441 if key in self:
442 return self[key]
443 else:
444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens."""
448 self.__toklist.insert(index, insStr)
449
450
451
452
453
454 for name,occurrences in self.__tokdict.items():
455 for k, (value, position) in enumerate(occurrences):
456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
459 """Add single element to end of ParseResults list of elements."""
460 self.__toklist.append(item)
461
463 """Add sequence of elements to end of ParseResults list of elements."""
464 if isinstance(itemseq, ParseResults):
465 self += itemseq
466 else:
467 self.__toklist.extend(itemseq)
468
470 """Clear all elements and results names."""
471 del self.__toklist[:]
472 self.__tokdict.clear()
473
475 try:
476 return self[name]
477 except KeyError:
478 return ""
479
480 if name in self.__tokdict:
481 if name not in self.__accumNames:
482 return self.__tokdict[name][-1][0]
483 else:
484 return ParseResults([ v[0] for v in self.__tokdict[name] ])
485 else:
486 return ""
487
489 ret = self.copy()
490 ret += other
491 return ret
492
494 if other.__tokdict:
495 offset = len(self.__toklist)
496 addoffset = lambda a: offset if a<0 else a+offset
497 otheritems = other.__tokdict.items()
498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
499 for (k,vlist) in otheritems for v in vlist]
500 for k,v in otherdictitems:
501 self[k] = v
502 if isinstance(v[0],ParseResults):
503 v[0].__parent = wkref(self)
504
505 self.__toklist += other.__toklist
506 self.__accumNames.update( other.__accumNames )
507 return self
508
510 if isinstance(other,int) and other == 0:
511
512 return self.copy()
513 else:
514
515 return other + self
516
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
524 out = []
525 for item in self.__toklist:
526 if out and sep:
527 out.append(sep)
528 if isinstance( item, ParseResults ):
529 out += item._asStringList()
530 else:
531 out.append( _ustr(item) )
532 return out
533
535 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
539 """Returns the named parse results as a nested dictionary."""
540 if PY_3:
541 item_fn = self.items
542 else:
543 item_fn = self.iteritems
544
545 def toItem(obj):
546 if isinstance(obj, ParseResults):
547 if obj.haskeys():
548 return obj.asDict()
549 else:
550 return [toItem(v) for v in obj]
551 else:
552 return obj
553
554 return dict((k,toItem(v)) for k,v in item_fn())
555
557 """Returns a new copy of a C{ParseResults} object."""
558 ret = ParseResults( self.__toklist )
559 ret.__tokdict = self.__tokdict.copy()
560 ret.__parent = self.__parent
561 ret.__accumNames.update( self.__accumNames )
562 ret.__name = self.__name
563 return ret
564
565 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
566 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
567 nl = "\n"
568 out = []
569 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
570 for v in vlist)
571 nextLevelIndent = indent + " "
572
573
574 if not formatted:
575 indent = ""
576 nextLevelIndent = ""
577 nl = ""
578
579 selfTag = None
580 if doctag is not None:
581 selfTag = doctag
582 else:
583 if self.__name:
584 selfTag = self.__name
585
586 if not selfTag:
587 if namedItemsOnly:
588 return ""
589 else:
590 selfTag = "ITEM"
591
592 out += [ nl, indent, "<", selfTag, ">" ]
593
594 for i,res in enumerate(self.__toklist):
595 if isinstance(res,ParseResults):
596 if i in namedItems:
597 out += [ res.asXML(namedItems[i],
598 namedItemsOnly and doctag is None,
599 nextLevelIndent,
600 formatted)]
601 else:
602 out += [ res.asXML(None,
603 namedItemsOnly and doctag is None,
604 nextLevelIndent,
605 formatted)]
606 else:
607
608 resTag = None
609 if i in namedItems:
610 resTag = namedItems[i]
611 if not resTag:
612 if namedItemsOnly:
613 continue
614 else:
615 resTag = "ITEM"
616 xmlBodyText = _xml_escape(_ustr(res))
617 out += [ nl, nextLevelIndent, "<", resTag, ">",
618 xmlBodyText,
619 "</", resTag, ">" ]
620
621 out += [ nl, indent, "</", selfTag, ">" ]
622 return "".join(out)
623
625 for k,vlist in self.__tokdict.items():
626 for v,loc in vlist:
627 if sub is v:
628 return k
629 return None
630
632 """Returns the results name for this token expression."""
633 if self.__name:
634 return self.__name
635 elif self.__parent:
636 par = self.__parent()
637 if par:
638 return par.__lookup(self)
639 else:
640 return None
641 elif (len(self) == 1 and
642 len(self.__tokdict) == 1 and
643 self.__tokdict.values()[0][0][1] in (0,-1)):
644 return self.__tokdict.keys()[0]
645 else:
646 return None
647
648 - def dump(self,indent='',depth=0):
649 """Diagnostic method for listing out the contents of a C{ParseResults}.
650 Accepts an optional C{indent} argument so that this string can be embedded
651 in a nested display of other data."""
652 out = []
653 NL = '\n'
654 out.append( indent+_ustr(self.asList()) )
655 if self.haskeys():
656 items = sorted(self.items())
657 for k,v in items:
658 if out:
659 out.append(NL)
660 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
661 if isinstance(v,ParseResults):
662 if v:
663 out.append( v.dump(indent,depth+1) )
664 else:
665 out.append(_ustr(v))
666 else:
667 out.append(_ustr(v))
668 elif any(isinstance(vv,ParseResults) for vv in self):
669 v = self
670 for i,vv in enumerate(v):
671 if isinstance(vv,ParseResults):
672 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
673 else:
674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
675
676 return "".join(out)
677
678 - def pprint(self, *args, **kwargs):
679 """Pretty-printer for parsed results as a list, using the C{pprint} module.
680 Accepts additional positional or keyword args as defined for the
681 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
682 pprint.pprint(self.asList(), *args, **kwargs)
683
684
686 return ( self.__toklist,
687 ( self.__tokdict.copy(),
688 self.__parent is not None and self.__parent() or None,
689 self.__accumNames,
690 self.__name ) )
691
693 self.__toklist = state[0]
694 (self.__tokdict,
695 par,
696 inAccumNames,
697 self.__name) = state[1]
698 self.__accumNames = {}
699 self.__accumNames.update(inAccumNames)
700 if par is not None:
701 self.__parent = wkref(par)
702 else:
703 self.__parent = None
704
706 return self.__toklist, self.__name, self.__asList, self.__modal
707
709 return (dir(type(self)) + list(self.keys()))
710
711 collections.MutableMapping.register(ParseResults)
712
713 -def col (loc,strg):
714 """Returns current column within a string, counting newlines as line separators.
715 The first column is number 1.
716
717 Note: the default parsing behavior is to expand tabs in the input string
718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
720 consistent view of the parsed string, the parse location, and line and column
721 positions within the parsed string.
722 """
723 s = strg
724 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
725
727 """Returns current line number within a string, counting newlines as line separators.
728 The first line is number 1.
729
730 Note: the default parsing behavior is to expand tabs in the input string
731 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
732 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
733 consistent view of the parsed string, the parse location, and line and column
734 positions within the parsed string.
735 """
736 return strg.count("\n",0,loc) + 1
737
738 -def line( loc, strg ):
739 """Returns the line of text containing loc within a string, counting newlines as line separators.
740 """
741 lastCR = strg.rfind("\n", 0, loc)
742 nextCR = strg.find("\n", loc)
743 if nextCR >= 0:
744 return strg[lastCR+1:nextCR]
745 else:
746 return strg[lastCR+1:]
747
749 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
750
752 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
753
755 print ("Exception raised:" + _ustr(exc))
756
758 """'Do-nothing' debug action, to suppress debugging output during parsing."""
759 pass
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783 'decorator to trim function calls to match the arity of the target'
785 if func in singleArgBuiltins:
786 return lambda s,l,t: func(t)
787 limit = [0]
788 foundArity = [False]
789
790
791 ver = tuple(sys.version_info)[:3]
792 if ver[:2] >= (3,5):
793 def extract_stack():
794
795 offset = -3 if ver == (3,5,0) else -2
796 frame_summary = traceback.extract_stack()[offset]
797 return [(frame_summary.filename, frame_summary.lineno)]
798 def extract_tb(tb):
799 frames = traceback.extract_tb(tb)
800 frame_summary = frames[-1]
801 return [(frame_summary.filename, frame_summary.lineno)]
802 else:
803 extract_stack = traceback.extract_stack
804 extract_tb = traceback.extract_tb
805
806
807
808
809 LINE_DIFF = 6
810
811
812 this_line = extract_stack()[-1]
813 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
814
815 def wrapper(*args):
816 while 1:
817 try:
818 ret = func(*args[limit[0]:])
819 foundArity[0] = True
820 return ret
821 except TypeError:
822
823 if foundArity[0]:
824 raise
825 else:
826 try:
827 tb = sys.exc_info()[-1]
828 if not extract_tb(tb)[-1][:2] == pa_call_line_synth:
829 raise
830 finally:
831 del tb
832
833 if limit[0] <= maxargs:
834 limit[0] += 1
835 continue
836 raise
837 return wrapper
838
840 """Abstract base level parser element class."""
841 DEFAULT_WHITE_CHARS = " \n\t\r"
842 verbose_stacktrace = False
843
844 @staticmethod
849
850 @staticmethod
852 """
853 Set class to be used for inclusion of string literals into a parser.
854 """
855 ParserElement.literalStringClass = cls
856
858 self.parseAction = list()
859 self.failAction = None
860
861 self.strRepr = None
862 self.resultsName = None
863 self.saveAsList = savelist
864 self.skipWhitespace = True
865 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
866 self.copyDefaultWhiteChars = True
867 self.mayReturnEmpty = False
868 self.keepTabs = False
869 self.ignoreExprs = list()
870 self.debug = False
871 self.streamlined = False
872 self.mayIndexError = True
873 self.errmsg = ""
874 self.modalResults = True
875 self.debugActions = ( None, None, None )
876 self.re = None
877 self.callPreparse = True
878 self.callDuringTry = False
879
881 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
882 for the same parsing pattern, using copies of the original parse element."""
883 cpy = copy.copy( self )
884 cpy.parseAction = self.parseAction[:]
885 cpy.ignoreExprs = self.ignoreExprs[:]
886 if self.copyDefaultWhiteChars:
887 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
888 return cpy
889
891 """Define name for this expression, for use in debugging."""
892 self.name = name
893 self.errmsg = "Expected " + self.name
894 if hasattr(self,"exception"):
895 self.exception.msg = self.errmsg
896 return self
897
899 """Define name for referencing matching tokens as a nested attribute
900 of the returned parse results.
901 NOTE: this returns a *copy* of the original C{ParserElement} object;
902 this is so that the client can define a basic element, such as an
903 integer, and reference it in multiple places with different names.
904
905 You can also set results names using the abbreviated syntax,
906 C{expr("name")} in place of C{expr.setResultsName("name")} -
907 see L{I{__call__}<__call__>}.
908 """
909 newself = self.copy()
910 if name.endswith("*"):
911 name = name[:-1]
912 listAllMatches=True
913 newself.resultsName = name
914 newself.modalResults = not listAllMatches
915 return newself
916
918 """Method to invoke the Python pdb debugger when this element is
919 about to be parsed. Set C{breakFlag} to True to enable, False to
920 disable.
921 """
922 if breakFlag:
923 _parseMethod = self._parse
924 def breaker(instring, loc, doActions=True, callPreParse=True):
925 import pdb
926 pdb.set_trace()
927 return _parseMethod( instring, loc, doActions, callPreParse )
928 breaker._originalParseMethod = _parseMethod
929 self._parse = breaker
930 else:
931 if hasattr(self._parse,"_originalParseMethod"):
932 self._parse = self._parse._originalParseMethod
933 return self
934
936 """Define action to perform when successfully matching parse element definition.
937 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
938 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
939 - s = the original string being parsed (see note below)
940 - loc = the location of the matching substring
941 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
942 If the functions in fns modify the tokens, they can return them as the return
943 value from fn, and the modified list of tokens will replace the original.
944 Otherwise, fn does not need to return any value.
945
946 Note: the default parsing behavior is to expand tabs in the input string
947 before starting the parsing process. See L{I{parseString}<parseString>} for more information
948 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
949 consistent view of the parsed string, the parse location, and line and column
950 positions within the parsed string.
951 """
952 self.parseAction = list(map(_trim_arity, list(fns)))
953 self.callDuringTry = kwargs.get("callDuringTry", False)
954 return self
955
957 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
958 self.parseAction += list(map(_trim_arity, list(fns)))
959 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
960 return self
961
963 """Add a boolean predicate function to expression's list of parse actions. See
964 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can
965 be used to define a custom message to be used in the raised exception."""
966 msg = kwargs.get("message") or "failed user-defined condition"
967 for fn in fns:
968 def pa(s,l,t):
969 if not bool(_trim_arity(fn)(s,l,t)):
970 raise ParseException(s,l,msg)
971 return t
972 self.parseAction.append(pa)
973 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
974 return self
975
977 """Define action to perform if parsing fails at this expression.
978 Fail acton fn is a callable function that takes the arguments
979 C{fn(s,loc,expr,err)} where:
980 - s = string being parsed
981 - loc = location where expression match was attempted and failed
982 - expr = the parse expression that failed
983 - err = the exception thrown
984 The function returns no value. It may throw C{L{ParseFatalException}}
985 if it is desired to stop parsing immediately."""
986 self.failAction = fn
987 return self
988
990 exprsFound = True
991 while exprsFound:
992 exprsFound = False
993 for e in self.ignoreExprs:
994 try:
995 while 1:
996 loc,dummy = e._parse( instring, loc )
997 exprsFound = True
998 except ParseException:
999 pass
1000 return loc
1001
1003 if self.ignoreExprs:
1004 loc = self._skipIgnorables( instring, loc )
1005
1006 if self.skipWhitespace:
1007 wt = self.whiteChars
1008 instrlen = len(instring)
1009 while loc < instrlen and instring[loc] in wt:
1010 loc += 1
1011
1012 return loc
1013
1014 - def parseImpl( self, instring, loc, doActions=True ):
1016
1017 - def postParse( self, instring, loc, tokenlist ):
1019
1020
1021 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1022 debugging = ( self.debug )
1023
1024 if debugging or self.failAction:
1025
1026 if (self.debugActions[0] ):
1027 self.debugActions[0]( instring, loc, self )
1028 if callPreParse and self.callPreparse:
1029 preloc = self.preParse( instring, loc )
1030 else:
1031 preloc = loc
1032 tokensStart = preloc
1033 try:
1034 try:
1035 loc,tokens = self.parseImpl( instring, preloc, doActions )
1036 except IndexError:
1037 raise ParseException( instring, len(instring), self.errmsg, self )
1038 except ParseBaseException as err:
1039
1040 if self.debugActions[2]:
1041 self.debugActions[2]( instring, tokensStart, self, err )
1042 if self.failAction:
1043 self.failAction( instring, tokensStart, self, err )
1044 raise
1045 else:
1046 if callPreParse and self.callPreparse:
1047 preloc = self.preParse( instring, loc )
1048 else:
1049 preloc = loc
1050 tokensStart = preloc
1051 if self.mayIndexError or loc >= len(instring):
1052 try:
1053 loc,tokens = self.parseImpl( instring, preloc, doActions )
1054 except IndexError:
1055 raise ParseException( instring, len(instring), self.errmsg, self )
1056 else:
1057 loc,tokens = self.parseImpl( instring, preloc, doActions )
1058
1059 tokens = self.postParse( instring, loc, tokens )
1060
1061 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1062 if self.parseAction and (doActions or self.callDuringTry):
1063 if debugging:
1064 try:
1065 for fn in self.parseAction:
1066 tokens = fn( instring, tokensStart, retTokens )
1067 if tokens is not None:
1068 retTokens = ParseResults( tokens,
1069 self.resultsName,
1070 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1071 modal=self.modalResults )
1072 except ParseBaseException as err:
1073
1074 if (self.debugActions[2] ):
1075 self.debugActions[2]( instring, tokensStart, self, err )
1076 raise
1077 else:
1078 for fn in self.parseAction:
1079 tokens = fn( instring, tokensStart, retTokens )
1080 if tokens is not None:
1081 retTokens = ParseResults( tokens,
1082 self.resultsName,
1083 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1084 modal=self.modalResults )
1085
1086 if debugging:
1087
1088 if (self.debugActions[1] ):
1089 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1090
1091 return loc, retTokens
1092
1098
1100 try:
1101 self.tryParse(instring, loc)
1102 except (ParseException, IndexError):
1103 return False
1104 else:
1105 return True
1106
1107
1108
1109 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1110 lookup = (self,instring,loc,callPreParse,doActions)
1111 if lookup in ParserElement._exprArgCache:
1112 value = ParserElement._exprArgCache[ lookup ]
1113 if isinstance(value, Exception):
1114 raise value
1115 return (value[0],value[1].copy())
1116 else:
1117 try:
1118 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1119 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1120 return value
1121 except ParseBaseException as pe:
1122 pe.__traceback__ = None
1123 ParserElement._exprArgCache[ lookup ] = pe
1124 raise
1125
1126 _parse = _parseNoCache
1127
1128
1129 _exprArgCache = {}
1130 @staticmethod
1133
1134 _packratEnabled = False
1135 @staticmethod
1137 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1138 Repeated parse attempts at the same string location (which happens
1139 often in many complex grammars) can immediately return a cached value,
1140 instead of re-executing parsing/validating code. Memoizing is done of
1141 both valid results and parsing exceptions.
1142
1143 This speedup may break existing programs that use parse actions that
1144 have side-effects. For this reason, packrat parsing is disabled when
1145 you first import pyparsing. To activate the packrat feature, your
1146 program must call the class method C{ParserElement.enablePackrat()}. If
1147 your program uses C{psyco} to "compile as you go", you must call
1148 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1149 Python will crash. For best results, call C{enablePackrat()} immediately
1150 after importing pyparsing.
1151 """
1152 if not ParserElement._packratEnabled:
1153 ParserElement._packratEnabled = True
1154 ParserElement._parse = ParserElement._parseCache
1155
1157 """Execute the parse expression with the given string.
1158 This is the main interface to the client code, once the complete
1159 expression has been built.
1160
1161 If you want the grammar to require that the entire input string be
1162 successfully parsed, then set C{parseAll} to True (equivalent to ending
1163 the grammar with C{L{StringEnd()}}).
1164
1165 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1166 in order to report proper column numbers in parse actions.
1167 If the input string contains tabs and
1168 the grammar uses parse actions that use the C{loc} argument to index into the
1169 string being parsed, you can ensure you have a consistent view of the input
1170 string by:
1171 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1172 (see L{I{parseWithTabs}<parseWithTabs>})
1173 - define your parse action using the full C{(s,loc,toks)} signature, and
1174 reference the input string using the parse action's C{s} argument
1175 - explictly expand the tabs in your input string before calling
1176 C{parseString}
1177 """
1178 ParserElement.resetCache()
1179 if not self.streamlined:
1180 self.streamline()
1181
1182 for e in self.ignoreExprs:
1183 e.streamline()
1184 if not self.keepTabs:
1185 instring = instring.expandtabs()
1186 try:
1187 loc, tokens = self._parse( instring, 0 )
1188 if parseAll:
1189 loc = self.preParse( instring, loc )
1190 se = Empty() + StringEnd()
1191 se._parse( instring, loc )
1192 except ParseBaseException as exc:
1193 if ParserElement.verbose_stacktrace:
1194 raise
1195 else:
1196
1197 raise exc
1198 else:
1199 return tokens
1200
1202 """Scan the input string for expression matches. Each match will return the
1203 matching tokens, start location, and end location. May be called with optional
1204 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1205 C{overlap} is specified, then overlapping matches will be reported.
1206
1207 Note that the start and end locations are reported relative to the string
1208 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1209 strings with embedded tabs."""
1210 if not self.streamlined:
1211 self.streamline()
1212 for e in self.ignoreExprs:
1213 e.streamline()
1214
1215 if not self.keepTabs:
1216 instring = _ustr(instring).expandtabs()
1217 instrlen = len(instring)
1218 loc = 0
1219 preparseFn = self.preParse
1220 parseFn = self._parse
1221 ParserElement.resetCache()
1222 matches = 0
1223 try:
1224 while loc <= instrlen and matches < maxMatches:
1225 try:
1226 preloc = preparseFn( instring, loc )
1227 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1228 except ParseException:
1229 loc = preloc+1
1230 else:
1231 if nextLoc > loc:
1232 matches += 1
1233 yield tokens, preloc, nextLoc
1234 if overlap:
1235 nextloc = preparseFn( instring, loc )
1236 if nextloc > loc:
1237 loc = nextLoc
1238 else:
1239 loc += 1
1240 else:
1241 loc = nextLoc
1242 else:
1243 loc = preloc+1
1244 except ParseBaseException as exc:
1245 if ParserElement.verbose_stacktrace:
1246 raise
1247 else:
1248
1249 raise exc
1250
1283
1285 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1286 to match the given parse expression. May be called with optional
1287 C{maxMatches} argument, to clip searching after 'n' matches are found.
1288 """
1289 try:
1290 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1291 except ParseBaseException as exc:
1292 if ParserElement.verbose_stacktrace:
1293 raise
1294 else:
1295
1296 raise exc
1297
1299 """Implementation of + operator - returns C{L{And}}"""
1300 if isinstance( other, basestring ):
1301 other = ParserElement.literalStringClass( other )
1302 if not isinstance( other, ParserElement ):
1303 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1304 SyntaxWarning, stacklevel=2)
1305 return None
1306 return And( [ self, other ] )
1307
1309 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1310 if isinstance( other, basestring ):
1311 other = ParserElement.literalStringClass( other )
1312 if not isinstance( other, ParserElement ):
1313 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1314 SyntaxWarning, stacklevel=2)
1315 return None
1316 return other + self
1317
1319 """Implementation of - operator, returns C{L{And}} with error stop"""
1320 if isinstance( other, basestring ):
1321 other = ParserElement.literalStringClass( other )
1322 if not isinstance( other, ParserElement ):
1323 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1324 SyntaxWarning, stacklevel=2)
1325 return None
1326 return And( [ self, And._ErrorStop(), other ] )
1327
1329 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1330 if isinstance( other, basestring ):
1331 other = ParserElement.literalStringClass( other )
1332 if not isinstance( other, ParserElement ):
1333 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1334 SyntaxWarning, stacklevel=2)
1335 return None
1336 return other - self
1337
1339 """Implementation of * operator, allows use of C{expr * 3} in place of
1340 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1341 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1342 may also include C{None} as in:
1343 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1344 to C{expr*n + L{ZeroOrMore}(expr)}
1345 (read as "at least n instances of C{expr}")
1346 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1347 (read as "0 to n instances of C{expr}")
1348 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1349 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1350
1351 Note that C{expr*(None,n)} does not raise an exception if
1352 more than n exprs exist in the input stream; that is,
1353 C{expr*(None,n)} does not enforce a maximum number of expr
1354 occurrences. If this behavior is desired, then write
1355 C{expr*(None,n) + ~expr}
1356
1357 """
1358 if isinstance(other,int):
1359 minElements, optElements = other,0
1360 elif isinstance(other,tuple):
1361 other = (other + (None, None))[:2]
1362 if other[0] is None:
1363 other = (0, other[1])
1364 if isinstance(other[0],int) and other[1] is None:
1365 if other[0] == 0:
1366 return ZeroOrMore(self)
1367 if other[0] == 1:
1368 return OneOrMore(self)
1369 else:
1370 return self*other[0] + ZeroOrMore(self)
1371 elif isinstance(other[0],int) and isinstance(other[1],int):
1372 minElements, optElements = other
1373 optElements -= minElements
1374 else:
1375 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1376 else:
1377 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1378
1379 if minElements < 0:
1380 raise ValueError("cannot multiply ParserElement by negative value")
1381 if optElements < 0:
1382 raise ValueError("second tuple value must be greater or equal to first tuple value")
1383 if minElements == optElements == 0:
1384 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1385
1386 if (optElements):
1387 def makeOptionalList(n):
1388 if n>1:
1389 return Optional(self + makeOptionalList(n-1))
1390 else:
1391 return Optional(self)
1392 if minElements:
1393 if minElements == 1:
1394 ret = self + makeOptionalList(optElements)
1395 else:
1396 ret = And([self]*minElements) + makeOptionalList(optElements)
1397 else:
1398 ret = makeOptionalList(optElements)
1399 else:
1400 if minElements == 1:
1401 ret = self
1402 else:
1403 ret = And([self]*minElements)
1404 return ret
1405
1408
1410 """Implementation of | operator - returns C{L{MatchFirst}}"""
1411 if isinstance( other, basestring ):
1412 other = ParserElement.literalStringClass( other )
1413 if not isinstance( other, ParserElement ):
1414 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1415 SyntaxWarning, stacklevel=2)
1416 return None
1417 return MatchFirst( [ self, other ] )
1418
1420 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1421 if isinstance( other, basestring ):
1422 other = ParserElement.literalStringClass( other )
1423 if not isinstance( other, ParserElement ):
1424 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1425 SyntaxWarning, stacklevel=2)
1426 return None
1427 return other | self
1428
1430 """Implementation of ^ operator - returns C{L{Or}}"""
1431 if isinstance( other, basestring ):
1432 other = ParserElement.literalStringClass( other )
1433 if not isinstance( other, ParserElement ):
1434 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1435 SyntaxWarning, stacklevel=2)
1436 return None
1437 return Or( [ self, other ] )
1438
1440 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1441 if isinstance( other, basestring ):
1442 other = ParserElement.literalStringClass( other )
1443 if not isinstance( other, ParserElement ):
1444 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1445 SyntaxWarning, stacklevel=2)
1446 return None
1447 return other ^ self
1448
1450 """Implementation of & operator - returns C{L{Each}}"""
1451 if isinstance( other, basestring ):
1452 other = ParserElement.literalStringClass( other )
1453 if not isinstance( other, ParserElement ):
1454 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1455 SyntaxWarning, stacklevel=2)
1456 return None
1457 return Each( [ self, other ] )
1458
1460 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1461 if isinstance( other, basestring ):
1462 other = ParserElement.literalStringClass( other )
1463 if not isinstance( other, ParserElement ):
1464 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1465 SyntaxWarning, stacklevel=2)
1466 return None
1467 return other & self
1468
1470 """Implementation of ~ operator - returns C{L{NotAny}}"""
1471 return NotAny( self )
1472
1474 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1475 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1476 could be written as::
1477 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1478
1479 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1480 passed as C{True}.
1481
1482 If C{name} is omitted, same as calling C{L{copy}}.
1483 """
1484 if name is not None:
1485 return self.setResultsName(name)
1486 else:
1487 return self.copy()
1488
1490 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1491 cluttering up returned output.
1492 """
1493 return Suppress( self )
1494
1496 """Disables the skipping of whitespace before matching the characters in the
1497 C{ParserElement}'s defined pattern. This is normally only used internally by
1498 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1499 """
1500 self.skipWhitespace = False
1501 return self
1502
1504 """Overrides the default whitespace chars
1505 """
1506 self.skipWhitespace = True
1507 self.whiteChars = chars
1508 self.copyDefaultWhiteChars = False
1509 return self
1510
1512 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1513 Must be called before C{parseString} when the input grammar contains elements that
1514 match C{<TAB>} characters."""
1515 self.keepTabs = True
1516 return self
1517
1519 """Define expression to be ignored (e.g., comments) while doing pattern
1520 matching; may be called repeatedly, to define multiple comment or other
1521 ignorable patterns.
1522 """
1523 if isinstance(other, basestring):
1524 other = Suppress(other)
1525
1526 if isinstance( other, Suppress ):
1527 if other not in self.ignoreExprs:
1528 self.ignoreExprs.append(other)
1529 else:
1530 self.ignoreExprs.append( Suppress( other.copy() ) )
1531 return self
1532
1533 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1534 """Enable display of debugging messages while doing pattern matching."""
1535 self.debugActions = (startAction or _defaultStartDebugAction,
1536 successAction or _defaultSuccessDebugAction,
1537 exceptionAction or _defaultExceptionDebugAction)
1538 self.debug = True
1539 return self
1540
1542 """Enable display of debugging messages while doing pattern matching.
1543 Set C{flag} to True to enable, False to disable."""
1544 if flag:
1545 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1546 else:
1547 self.debug = False
1548 return self
1549
1552
1555
1557 self.streamlined = True
1558 self.strRepr = None
1559 return self
1560
1563
1564 - def validate( self, validateTrace=[] ):
1565 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1566 self.checkRecursion( [] )
1567
1568 - def parseFile( self, file_or_filename, parseAll=False ):
1569 """Execute the parse expression on the given file or filename.
1570 If a filename is specified (instead of a file object),
1571 the entire file is opened, read, and closed before parsing.
1572 """
1573 try:
1574 file_contents = file_or_filename.read()
1575 except AttributeError:
1576 f = open(file_or_filename, "r")
1577 file_contents = f.read()
1578 f.close()
1579 try:
1580 return self.parseString(file_contents, parseAll)
1581 except ParseBaseException as exc:
1582 if ParserElement.verbose_stacktrace:
1583 raise
1584 else:
1585
1586 raise exc
1587
1589 if isinstance(other, ParserElement):
1590 return self is other or vars(self) == vars(other)
1591 elif isinstance(other, basestring):
1592 try:
1593 self.parseString(_ustr(other), parseAll=True)
1594 return True
1595 except ParseBaseException:
1596 return False
1597 else:
1598 return super(ParserElement,self)==other
1599
1601 return not (self == other)
1602
1604 return hash(id(self))
1605
1607 return self == other
1608
1610 return not (self == other)
1611
1612 - def runTests(self, tests, parseAll=False):
1613 """Execute the parse expression on a series of test strings, showing each
1614 test, the parsed results or where the parse failed. Quick and easy way to
1615 run a parse expression against a list of sample strings.
1616
1617 Parameters:
1618 - tests - a list of separate test strings, or a multiline string of test strings
1619 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests
1620 """
1621 if isinstance(tests, basestring):
1622 tests = map(str.strip, tests.splitlines())
1623 for t in tests:
1624 out = [t]
1625 try:
1626 out.append(self.parseString(t, parseAll=parseAll).dump())
1627 except ParseException as pe:
1628 if '\n' in t:
1629 out.append(line(pe.loc, t))
1630 out.append(' '*(col(pe.loc,t)-1) + '^')
1631 else:
1632 out.append(' '*pe.loc + '^')
1633 out.append(str(pe))
1634 out.append('')
1635 print('\n'.join(out))
1636
1637
1638 -class Token(ParserElement):
1639 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1642
1643
1644 -class Empty(Token):
1645 """An empty token, will always match."""
1647 super(Empty,self).__init__()
1648 self.name = "Empty"
1649 self.mayReturnEmpty = True
1650 self.mayIndexError = False
1651
1654 """A token that will never match."""
1656 super(NoMatch,self).__init__()
1657 self.name = "NoMatch"
1658 self.mayReturnEmpty = True
1659 self.mayIndexError = False
1660 self.errmsg = "Unmatchable token"
1661
1662 - def parseImpl( self, instring, loc, doActions=True ):
1664
1667 """Token to exactly match a specified string."""
1669 super(Literal,self).__init__()
1670 self.match = matchString
1671 self.matchLen = len(matchString)
1672 try:
1673 self.firstMatchChar = matchString[0]
1674 except IndexError:
1675 warnings.warn("null string passed to Literal; use Empty() instead",
1676 SyntaxWarning, stacklevel=2)
1677 self.__class__ = Empty
1678 self.name = '"%s"' % _ustr(self.match)
1679 self.errmsg = "Expected " + self.name
1680 self.mayReturnEmpty = False
1681 self.mayIndexError = False
1682
1683
1684
1685
1686
1687 - def parseImpl( self, instring, loc, doActions=True ):
1688 if (instring[loc] == self.firstMatchChar and
1689 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1690 return loc+self.matchLen, self.match
1691 raise ParseException(instring, loc, self.errmsg, self)
1692 _L = Literal
1693 ParserElement.literalStringClass = Literal
1696 """Token to exactly match a specified string as a keyword, that is, it must be
1697 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1698 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1699 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1700 Accepts two optional constructor arguments in addition to the keyword string:
1701 C{identChars} is a string of characters that would be valid identifier characters,
1702 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1703 matching, default is C{False}.
1704 """
1705 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1706
1708 super(Keyword,self).__init__()
1709 self.match = matchString
1710 self.matchLen = len(matchString)
1711 try:
1712 self.firstMatchChar = matchString[0]
1713 except IndexError:
1714 warnings.warn("null string passed to Keyword; use Empty() instead",
1715 SyntaxWarning, stacklevel=2)
1716 self.name = '"%s"' % self.match
1717 self.errmsg = "Expected " + self.name
1718 self.mayReturnEmpty = False
1719 self.mayIndexError = False
1720 self.caseless = caseless
1721 if caseless:
1722 self.caselessmatch = matchString.upper()
1723 identChars = identChars.upper()
1724 self.identChars = set(identChars)
1725
1726 - def parseImpl( self, instring, loc, doActions=True ):
1727 if self.caseless:
1728 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1729 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1730 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1731 return loc+self.matchLen, self.match
1732 else:
1733 if (instring[loc] == self.firstMatchChar and
1734 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1735 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1736 (loc == 0 or instring[loc-1] not in self.identChars) ):
1737 return loc+self.matchLen, self.match
1738 raise ParseException(instring, loc, self.errmsg, self)
1739
1744
1745 @staticmethod
1750
1752 """Token to match a specified string, ignoring case of letters.
1753 Note: the matched results will always be in the case of the given
1754 match string, NOT the case of the input text.
1755 """
1757 super(CaselessLiteral,self).__init__( matchString.upper() )
1758
1759 self.returnString = matchString
1760 self.name = "'%s'" % self.returnString
1761 self.errmsg = "Expected " + self.name
1762
1763 - def parseImpl( self, instring, loc, doActions=True ):
1764 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1765 return loc+self.matchLen, self.returnString
1766 raise ParseException(instring, loc, self.errmsg, self)
1767
1771
1772 - def parseImpl( self, instring, loc, doActions=True ):
1773 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1774 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1775 return loc+self.matchLen, self.match
1776 raise ParseException(instring, loc, self.errmsg, self)
1777
1779 """Token for matching words composed of allowed character sets.
1780 Defined with string containing all allowed initial characters,
1781 an optional string containing allowed body characters (if omitted,
1782 defaults to the initial character set), and an optional minimum,
1783 maximum, and/or exact length. The default value for C{min} is 1 (a
1784 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1785 are 0, meaning no maximum or exact length restriction. An optional
1786 C{excludeChars} parameter can list characters that might be found in
1787 the input C{bodyChars} string; useful to define a word of all printables
1788 except for one or two characters, for instance.
1789 """
1790 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1791 super(Word,self).__init__()
1792 if excludeChars:
1793 initChars = ''.join(c for c in initChars if c not in excludeChars)
1794 if bodyChars:
1795 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1796 self.initCharsOrig = initChars
1797 self.initChars = set(initChars)
1798 if bodyChars :
1799 self.bodyCharsOrig = bodyChars
1800 self.bodyChars = set(bodyChars)
1801 else:
1802 self.bodyCharsOrig = initChars
1803 self.bodyChars = set(initChars)
1804
1805 self.maxSpecified = max > 0
1806
1807 if min < 1:
1808 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1809
1810 self.minLen = min
1811
1812 if max > 0:
1813 self.maxLen = max
1814 else:
1815 self.maxLen = _MAX_INT
1816
1817 if exact > 0:
1818 self.maxLen = exact
1819 self.minLen = exact
1820
1821 self.name = _ustr(self)
1822 self.errmsg = "Expected " + self.name
1823 self.mayIndexError = False
1824 self.asKeyword = asKeyword
1825
1826 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1827 if self.bodyCharsOrig == self.initCharsOrig:
1828 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1829 elif len(self.initCharsOrig) == 1:
1830 self.reString = "%s[%s]*" % \
1831 (re.escape(self.initCharsOrig),
1832 _escapeRegexRangeChars(self.bodyCharsOrig),)
1833 else:
1834 self.reString = "[%s][%s]*" % \
1835 (_escapeRegexRangeChars(self.initCharsOrig),
1836 _escapeRegexRangeChars(self.bodyCharsOrig),)
1837 if self.asKeyword:
1838 self.reString = r"\b"+self.reString+r"\b"
1839 try:
1840 self.re = re.compile( self.reString )
1841 except:
1842 self.re = None
1843
1844 - def parseImpl( self, instring, loc, doActions=True ):
1845 if self.re:
1846 result = self.re.match(instring,loc)
1847 if not result:
1848 raise ParseException(instring, loc, self.errmsg, self)
1849
1850 loc = result.end()
1851 return loc, result.group()
1852
1853 if not(instring[ loc ] in self.initChars):
1854 raise ParseException(instring, loc, self.errmsg, self)
1855
1856 start = loc
1857 loc += 1
1858 instrlen = len(instring)
1859 bodychars = self.bodyChars
1860 maxloc = start + self.maxLen
1861 maxloc = min( maxloc, instrlen )
1862 while loc < maxloc and instring[loc] in bodychars:
1863 loc += 1
1864
1865 throwException = False
1866 if loc - start < self.minLen:
1867 throwException = True
1868 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1869 throwException = True
1870 if self.asKeyword:
1871 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1872 throwException = True
1873
1874 if throwException:
1875 raise ParseException(instring, loc, self.errmsg, self)
1876
1877 return loc, instring[start:loc]
1878
1880 try:
1881 return super(Word,self).__str__()
1882 except:
1883 pass
1884
1885
1886 if self.strRepr is None:
1887
1888 def charsAsStr(s):
1889 if len(s)>4:
1890 return s[:4]+"..."
1891 else:
1892 return s
1893
1894 if ( self.initCharsOrig != self.bodyCharsOrig ):
1895 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1896 else:
1897 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1898
1899 return self.strRepr
1900
1901
1902 -class Regex(Token):
1903 """Token for matching strings that match a given regular expression.
1904 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1905 """
1906 compiledREtype = type(re.compile("[A-Z]"))
1907 - def __init__( self, pattern, flags=0):
1908 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1909 super(Regex,self).__init__()
1910
1911 if isinstance(pattern, basestring):
1912 if not pattern:
1913 warnings.warn("null string passed to Regex; use Empty() instead",
1914 SyntaxWarning, stacklevel=2)
1915
1916 self.pattern = pattern
1917 self.flags = flags
1918
1919 try:
1920 self.re = re.compile(self.pattern, self.flags)
1921 self.reString = self.pattern
1922 except sre_constants.error:
1923 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1924 SyntaxWarning, stacklevel=2)
1925 raise
1926
1927 elif isinstance(pattern, Regex.compiledREtype):
1928 self.re = pattern
1929 self.pattern = \
1930 self.reString = str(pattern)
1931 self.flags = flags
1932
1933 else:
1934 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1935
1936 self.name = _ustr(self)
1937 self.errmsg = "Expected " + self.name
1938 self.mayIndexError = False
1939 self.mayReturnEmpty = True
1940
1941 - def parseImpl( self, instring, loc, doActions=True ):
1942 result = self.re.match(instring,loc)
1943 if not result:
1944 raise ParseException(instring, loc, self.errmsg, self)
1945
1946 loc = result.end()
1947 d = result.groupdict()
1948 ret = ParseResults(result.group())
1949 if d:
1950 for k in d:
1951 ret[k] = d[k]
1952 return loc,ret
1953
1955 try:
1956 return super(Regex,self).__str__()
1957 except:
1958 pass
1959
1960 if self.strRepr is None:
1961 self.strRepr = "Re:(%s)" % repr(self.pattern)
1962
1963 return self.strRepr
1964
1967 """Token for matching strings that are delimited by quoting characters.
1968 """
1969 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
1970 r"""Defined with the following parameters:
1971 - quoteChar - string of one or more characters defining the quote delimiting string
1972 - escChar - character to escape quotes, typically backslash (default=None)
1973 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1974 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1975 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1976 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1977 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
1978 """
1979 super(QuotedString,self).__init__()
1980
1981
1982 quoteChar = quoteChar.strip()
1983 if not quoteChar:
1984 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1985 raise SyntaxError()
1986
1987 if endQuoteChar is None:
1988 endQuoteChar = quoteChar
1989 else:
1990 endQuoteChar = endQuoteChar.strip()
1991 if not endQuoteChar:
1992 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1993 raise SyntaxError()
1994
1995 self.quoteChar = quoteChar
1996 self.quoteCharLen = len(quoteChar)
1997 self.firstQuoteChar = quoteChar[0]
1998 self.endQuoteChar = endQuoteChar
1999 self.endQuoteCharLen = len(endQuoteChar)
2000 self.escChar = escChar
2001 self.escQuote = escQuote
2002 self.unquoteResults = unquoteResults
2003 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2004
2005 if multiline:
2006 self.flags = re.MULTILINE | re.DOTALL
2007 self.pattern = r'%s(?:[^%s%s]' % \
2008 ( re.escape(self.quoteChar),
2009 _escapeRegexRangeChars(self.endQuoteChar[0]),
2010 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2011 else:
2012 self.flags = 0
2013 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2014 ( re.escape(self.quoteChar),
2015 _escapeRegexRangeChars(self.endQuoteChar[0]),
2016 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2017 if len(self.endQuoteChar) > 1:
2018 self.pattern += (
2019 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2020 _escapeRegexRangeChars(self.endQuoteChar[i]))
2021 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2022 )
2023 if escQuote:
2024 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2025 if escChar:
2026 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2027 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2028 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2029
2030 try:
2031 self.re = re.compile(self.pattern, self.flags)
2032 self.reString = self.pattern
2033 except sre_constants.error:
2034 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2035 SyntaxWarning, stacklevel=2)
2036 raise
2037
2038 self.name = _ustr(self)
2039 self.errmsg = "Expected " + self.name
2040 self.mayIndexError = False
2041 self.mayReturnEmpty = True
2042
2043 - def parseImpl( self, instring, loc, doActions=True ):
2044 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2045 if not result:
2046 raise ParseException(instring, loc, self.errmsg, self)
2047
2048 loc = result.end()
2049 ret = result.group()
2050
2051 if self.unquoteResults:
2052
2053
2054 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2055
2056 if isinstance(ret,basestring):
2057
2058 if '\\' in ret and self.convertWhitespaceEscapes:
2059 ws_map = {
2060 r'\t' : '\t',
2061 r'\n' : '\n',
2062 r'\f' : '\f',
2063 r'\r' : '\r',
2064 }
2065 for wslit,wschar in ws_map.items():
2066 ret = ret.replace(wslit, wschar)
2067
2068
2069 if self.escChar:
2070 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2071
2072
2073 if self.escQuote:
2074 ret = ret.replace(self.escQuote, self.endQuoteChar)
2075
2076 return loc, ret
2077
2079 try:
2080 return super(QuotedString,self).__str__()
2081 except:
2082 pass
2083
2084 if self.strRepr is None:
2085 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2086
2087 return self.strRepr
2088
2091 """Token for matching words composed of characters *not* in a given set.
2092 Defined with string containing all disallowed characters, and an optional
2093 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2094 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2095 are 0, meaning no maximum or exact length restriction.
2096 """
2097 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2098 super(CharsNotIn,self).__init__()
2099 self.skipWhitespace = False
2100 self.notChars = notChars
2101
2102 if min < 1:
2103 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2104
2105 self.minLen = min
2106
2107 if max > 0:
2108 self.maxLen = max
2109 else:
2110 self.maxLen = _MAX_INT
2111
2112 if exact > 0:
2113 self.maxLen = exact
2114 self.minLen = exact
2115
2116 self.name = _ustr(self)
2117 self.errmsg = "Expected " + self.name
2118 self.mayReturnEmpty = ( self.minLen == 0 )
2119 self.mayIndexError = False
2120
2121 - def parseImpl( self, instring, loc, doActions=True ):
2122 if instring[loc] in self.notChars:
2123 raise ParseException(instring, loc, self.errmsg, self)
2124
2125 start = loc
2126 loc += 1
2127 notchars = self.notChars
2128 maxlen = min( start+self.maxLen, len(instring) )
2129 while loc < maxlen and \
2130 (instring[loc] not in notchars):
2131 loc += 1
2132
2133 if loc - start < self.minLen:
2134 raise ParseException(instring, loc, self.errmsg, self)
2135
2136 return loc, instring[start:loc]
2137
2139 try:
2140 return super(CharsNotIn, self).__str__()
2141 except:
2142 pass
2143
2144 if self.strRepr is None:
2145 if len(self.notChars) > 4:
2146 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2147 else:
2148 self.strRepr = "!W:(%s)" % self.notChars
2149
2150 return self.strRepr
2151
2153 """Special matching class for matching whitespace. Normally, whitespace is ignored
2154 by pyparsing grammars. This class is included when some whitespace structures
2155 are significant. Define with a string containing the whitespace characters to be
2156 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2157 as defined for the C{L{Word}} class."""
2158 whiteStrs = {
2159 " " : "<SPC>",
2160 "\t": "<TAB>",
2161 "\n": "<LF>",
2162 "\r": "<CR>",
2163 "\f": "<FF>",
2164 }
2165 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2166 super(White,self).__init__()
2167 self.matchWhite = ws
2168 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2169
2170 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2171 self.mayReturnEmpty = True
2172 self.errmsg = "Expected " + self.name
2173
2174 self.minLen = min
2175
2176 if max > 0:
2177 self.maxLen = max
2178 else:
2179 self.maxLen = _MAX_INT
2180
2181 if exact > 0:
2182 self.maxLen = exact
2183 self.minLen = exact
2184
2185 - def parseImpl( self, instring, loc, doActions=True ):
2186 if not(instring[ loc ] in self.matchWhite):
2187 raise ParseException(instring, loc, self.errmsg, self)
2188 start = loc
2189 loc += 1
2190 maxloc = start + self.maxLen
2191 maxloc = min( maxloc, len(instring) )
2192 while loc < maxloc and instring[loc] in self.matchWhite:
2193 loc += 1
2194
2195 if loc - start < self.minLen:
2196 raise ParseException(instring, loc, self.errmsg, self)
2197
2198 return loc, instring[start:loc]
2199
2203 super(_PositionToken,self).__init__()
2204 self.name=self.__class__.__name__
2205 self.mayReturnEmpty = True
2206 self.mayIndexError = False
2207
2209 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2213
2215 if col(loc,instring) != self.col:
2216 instrlen = len(instring)
2217 if self.ignoreExprs:
2218 loc = self._skipIgnorables( instring, loc )
2219 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2220 loc += 1
2221 return loc
2222
2223 - def parseImpl( self, instring, loc, doActions=True ):
2224 thiscol = col( loc, instring )
2225 if thiscol > self.col:
2226 raise ParseException( instring, loc, "Text not in expected column", self )
2227 newloc = loc + self.col - thiscol
2228 ret = instring[ loc: newloc ]
2229 return newloc, ret
2230
2232 """Matches if current position is at the beginning of a line within the parse string"""
2237
2239 preloc = super(LineStart,self).preParse(instring,loc)
2240 if instring[preloc] == "\n":
2241 loc += 1
2242 return loc
2243
2244 - def parseImpl( self, instring, loc, doActions=True ):
2245 if not( loc==0 or
2246 (loc == self.preParse( instring, 0 )) or
2247 (instring[loc-1] == "\n") ):
2248 raise ParseException(instring, loc, self.errmsg, self)
2249 return loc, []
2250
2252 """Matches if current position is at the end of a line within the parse string"""
2257
2258 - def parseImpl( self, instring, loc, doActions=True ):
2259 if loc<len(instring):
2260 if instring[loc] == "\n":
2261 return loc+1, "\n"
2262 else:
2263 raise ParseException(instring, loc, self.errmsg, self)
2264 elif loc == len(instring):
2265 return loc+1, []
2266 else:
2267 raise ParseException(instring, loc, self.errmsg, self)
2268
2270 """Matches if current position is at the beginning of the parse string"""
2274
2275 - def parseImpl( self, instring, loc, doActions=True ):
2276 if loc != 0:
2277
2278 if loc != self.preParse( instring, 0 ):
2279 raise ParseException(instring, loc, self.errmsg, self)
2280 return loc, []
2281
2283 """Matches if current position is at the end of the parse string"""
2287
2288 - def parseImpl( self, instring, loc, doActions=True ):
2289 if loc < len(instring):
2290 raise ParseException(instring, loc, self.errmsg, self)
2291 elif loc == len(instring):
2292 return loc+1, []
2293 elif loc > len(instring):
2294 return loc, []
2295 else:
2296 raise ParseException(instring, loc, self.errmsg, self)
2297
2299 """Matches if the current position is at the beginning of a Word, and
2300 is not preceded by any character in a given set of C{wordChars}
2301 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2302 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2303 the string being parsed, or at the beginning of a line.
2304 """
2306 super(WordStart,self).__init__()
2307 self.wordChars = set(wordChars)
2308 self.errmsg = "Not at the start of a word"
2309
2310 - def parseImpl(self, instring, loc, doActions=True ):
2311 if loc != 0:
2312 if (instring[loc-1] in self.wordChars or
2313 instring[loc] not in self.wordChars):
2314 raise ParseException(instring, loc, self.errmsg, self)
2315 return loc, []
2316
2318 """Matches if the current position is at the end of a Word, and
2319 is not followed by any character in a given set of C{wordChars}
2320 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2321 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2322 the string being parsed, or at the end of a line.
2323 """
2325 super(WordEnd,self).__init__()
2326 self.wordChars = set(wordChars)
2327 self.skipWhitespace = False
2328 self.errmsg = "Not at the end of a word"
2329
2330 - def parseImpl(self, instring, loc, doActions=True ):
2331 instrlen = len(instring)
2332 if instrlen>0 and loc<instrlen:
2333 if (instring[loc] in self.wordChars or
2334 instring[loc-1] not in self.wordChars):
2335 raise ParseException(instring, loc, self.errmsg, self)
2336 return loc, []
2337
2340 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2341 - def __init__( self, exprs, savelist = False ):
2342 super(ParseExpression,self).__init__(savelist)
2343 if isinstance( exprs, _generatorType ):
2344 exprs = list(exprs)
2345
2346 if isinstance( exprs, basestring ):
2347 self.exprs = [ Literal( exprs ) ]
2348 elif isinstance( exprs, collections.Sequence ):
2349
2350 if all(isinstance(expr, basestring) for expr in exprs):
2351 exprs = map(Literal, exprs)
2352 self.exprs = list(exprs)
2353 else:
2354 try:
2355 self.exprs = list( exprs )
2356 except TypeError:
2357 self.exprs = [ exprs ]
2358 self.callPreparse = False
2359
2361 return self.exprs[i]
2362
2364 self.exprs.append( other )
2365 self.strRepr = None
2366 return self
2367
2369 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2370 all contained expressions."""
2371 self.skipWhitespace = False
2372 self.exprs = [ e.copy() for e in self.exprs ]
2373 for e in self.exprs:
2374 e.leaveWhitespace()
2375 return self
2376
2378 if isinstance( other, Suppress ):
2379 if other not in self.ignoreExprs:
2380 super( ParseExpression, self).ignore( other )
2381 for e in self.exprs:
2382 e.ignore( self.ignoreExprs[-1] )
2383 else:
2384 super( ParseExpression, self).ignore( other )
2385 for e in self.exprs:
2386 e.ignore( self.ignoreExprs[-1] )
2387 return self
2388
2390 try:
2391 return super(ParseExpression,self).__str__()
2392 except:
2393 pass
2394
2395 if self.strRepr is None:
2396 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2397 return self.strRepr
2398
2400 super(ParseExpression,self).streamline()
2401
2402 for e in self.exprs:
2403 e.streamline()
2404
2405
2406
2407
2408 if ( len(self.exprs) == 2 ):
2409 other = self.exprs[0]
2410 if ( isinstance( other, self.__class__ ) and
2411 not(other.parseAction) and
2412 other.resultsName is None and
2413 not other.debug ):
2414 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2415 self.strRepr = None
2416 self.mayReturnEmpty |= other.mayReturnEmpty
2417 self.mayIndexError |= other.mayIndexError
2418
2419 other = self.exprs[-1]
2420 if ( isinstance( other, self.__class__ ) and
2421 not(other.parseAction) and
2422 other.resultsName is None and
2423 not other.debug ):
2424 self.exprs = self.exprs[:-1] + other.exprs[:]
2425 self.strRepr = None
2426 self.mayReturnEmpty |= other.mayReturnEmpty
2427 self.mayIndexError |= other.mayIndexError
2428
2429 self.errmsg = "Expected " + _ustr(self)
2430
2431 return self
2432
2436
2437 - def validate( self, validateTrace=[] ):
2438 tmp = validateTrace[:]+[self]
2439 for e in self.exprs:
2440 e.validate(tmp)
2441 self.checkRecursion( [] )
2442
2447
2448 -class And(ParseExpression):
2449 """Requires all given C{ParseExpression}s to be found in the given order.
2450 Expressions may be separated by whitespace.
2451 May be constructed using the C{'+'} operator.
2452 """
2453
2459
2460 - def __init__( self, exprs, savelist = True ):
2461 super(And,self).__init__(exprs, savelist)
2462 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2463 self.setWhitespaceChars( self.exprs[0].whiteChars )
2464 self.skipWhitespace = self.exprs[0].skipWhitespace
2465 self.callPreparse = True
2466
2467 - def parseImpl( self, instring, loc, doActions=True ):
2468
2469
2470 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2471 errorStop = False
2472 for e in self.exprs[1:]:
2473 if isinstance(e, And._ErrorStop):
2474 errorStop = True
2475 continue
2476 if errorStop:
2477 try:
2478 loc, exprtokens = e._parse( instring, loc, doActions )
2479 except ParseSyntaxException:
2480 raise
2481 except ParseBaseException as pe:
2482 pe.__traceback__ = None
2483 raise ParseSyntaxException(pe)
2484 except IndexError:
2485 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2486 else:
2487 loc, exprtokens = e._parse( instring, loc, doActions )
2488 if exprtokens or exprtokens.haskeys():
2489 resultlist += exprtokens
2490 return loc, resultlist
2491
2493 if isinstance( other, basestring ):
2494 other = Literal( other )
2495 return self.append( other )
2496
2498 subRecCheckList = parseElementList[:] + [ self ]
2499 for e in self.exprs:
2500 e.checkRecursion( subRecCheckList )
2501 if not e.mayReturnEmpty:
2502 break
2503
2505 if hasattr(self,"name"):
2506 return self.name
2507
2508 if self.strRepr is None:
2509 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2510
2511 return self.strRepr
2512
2513
2514 -class Or(ParseExpression):
2515 """Requires that at least one C{ParseExpression} is found.
2516 If two expressions match, the expression that matches the longest string will be used.
2517 May be constructed using the C{'^'} operator.
2518 """
2519 - def __init__( self, exprs, savelist = False ):
2520 super(Or,self).__init__(exprs, savelist)
2521 if self.exprs:
2522 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2523 else:
2524 self.mayReturnEmpty = True
2525
2526 - def parseImpl( self, instring, loc, doActions=True ):
2527 maxExcLoc = -1
2528 maxException = None
2529 matches = []
2530 for e in self.exprs:
2531 try:
2532 loc2 = e.tryParse( instring, loc )
2533 except ParseException as err:
2534 err.__traceback__ = None
2535 if err.loc > maxExcLoc:
2536 maxException = err
2537 maxExcLoc = err.loc
2538 except IndexError:
2539 if len(instring) > maxExcLoc:
2540 maxException = ParseException(instring,len(instring),e.errmsg,self)
2541 maxExcLoc = len(instring)
2542 else:
2543
2544 matches.append((loc2, e))
2545
2546 if matches:
2547 matches.sort(key=lambda x: -x[0])
2548 for _,e in matches:
2549 try:
2550 return e._parse( instring, loc, doActions )
2551 except ParseException as err:
2552 err.__traceback__ = None
2553 if err.loc > maxExcLoc:
2554 maxException = err
2555 maxExcLoc = err.loc
2556
2557 if maxException is not None:
2558 maxException.msg = self.errmsg
2559 raise maxException
2560 else:
2561 raise ParseException(instring, loc, "no defined alternatives to match", self)
2562
2563
2565 if isinstance( other, basestring ):
2566 other = ParserElement.literalStringClass( other )
2567 return self.append( other )
2568
2570 if hasattr(self,"name"):
2571 return self.name
2572
2573 if self.strRepr is None:
2574 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2575
2576 return self.strRepr
2577
2579 subRecCheckList = parseElementList[:] + [ self ]
2580 for e in self.exprs:
2581 e.checkRecursion( subRecCheckList )
2582
2585 """Requires that at least one C{ParseExpression} is found.
2586 If two expressions match, the first one listed is the one that will match.
2587 May be constructed using the C{'|'} operator.
2588 """
2589 - def __init__( self, exprs, savelist = False ):
2590 super(MatchFirst,self).__init__(exprs, savelist)
2591 if self.exprs:
2592 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2593 else:
2594 self.mayReturnEmpty = True
2595
2596 - def parseImpl( self, instring, loc, doActions=True ):
2597 maxExcLoc = -1
2598 maxException = None
2599 for e in self.exprs:
2600 try:
2601 ret = e._parse( instring, loc, doActions )
2602 return ret
2603 except ParseException as err:
2604 if err.loc > maxExcLoc:
2605 maxException = err
2606 maxExcLoc = err.loc
2607 except IndexError:
2608 if len(instring) > maxExcLoc:
2609 maxException = ParseException(instring,len(instring),e.errmsg,self)
2610 maxExcLoc = len(instring)
2611
2612
2613 else:
2614 if maxException is not None:
2615 maxException.msg = self.errmsg
2616 raise maxException
2617 else:
2618 raise ParseException(instring, loc, "no defined alternatives to match", self)
2619
2621 if isinstance( other, basestring ):
2622 other = ParserElement.literalStringClass( other )
2623 return self.append( other )
2624
2626 if hasattr(self,"name"):
2627 return self.name
2628
2629 if self.strRepr is None:
2630 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2631
2632 return self.strRepr
2633
2635 subRecCheckList = parseElementList[:] + [ self ]
2636 for e in self.exprs:
2637 e.checkRecursion( subRecCheckList )
2638
2639
2640 -class Each(ParseExpression):
2641 """Requires all given C{ParseExpression}s to be found, but in any order.
2642 Expressions may be separated by whitespace.
2643 May be constructed using the C{'&'} operator.
2644 """
2645 - def __init__( self, exprs, savelist = True ):
2646 super(Each,self).__init__(exprs, savelist)
2647 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2648 self.skipWhitespace = True
2649 self.initExprGroups = True
2650
2651 - def parseImpl( self, instring, loc, doActions=True ):
2652 if self.initExprGroups:
2653 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
2654 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2655 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
2656 self.optionals = opt1 + opt2
2657 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2658 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2659 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2660 self.required += self.multirequired
2661 self.initExprGroups = False
2662 tmpLoc = loc
2663 tmpReqd = self.required[:]
2664 tmpOpt = self.optionals[:]
2665 matchOrder = []
2666
2667 keepMatching = True
2668 while keepMatching:
2669 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2670 failed = []
2671 for e in tmpExprs:
2672 try:
2673 tmpLoc = e.tryParse( instring, tmpLoc )
2674 except ParseException:
2675 failed.append(e)
2676 else:
2677 matchOrder.append(self.opt1map.get(id(e),e))
2678 if e in tmpReqd:
2679 tmpReqd.remove(e)
2680 elif e in tmpOpt:
2681 tmpOpt.remove(e)
2682 if len(failed) == len(tmpExprs):
2683 keepMatching = False
2684
2685 if tmpReqd:
2686 missing = ", ".join(_ustr(e) for e in tmpReqd)
2687 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2688
2689
2690 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2691
2692 resultlist = []
2693 for e in matchOrder:
2694 loc,results = e._parse(instring,loc,doActions)
2695 resultlist.append(results)
2696
2697 finalResults = ParseResults()
2698 for r in resultlist:
2699 dups = {}
2700 for k in r.keys():
2701 if k in finalResults:
2702 tmp = ParseResults(finalResults[k])
2703 tmp += ParseResults(r[k])
2704 dups[k] = tmp
2705 finalResults += ParseResults(r)
2706 for k,v in dups.items():
2707 finalResults[k] = v
2708 return loc, finalResults
2709
2711 if hasattr(self,"name"):
2712 return self.name
2713
2714 if self.strRepr is None:
2715 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2716
2717 return self.strRepr
2718
2720 subRecCheckList = parseElementList[:] + [ self ]
2721 for e in self.exprs:
2722 e.checkRecursion( subRecCheckList )
2723
2726 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2727 - def __init__( self, expr, savelist=False ):
2728 super(ParseElementEnhance,self).__init__(savelist)
2729 if isinstance( expr, basestring ):
2730 expr = Literal(expr)
2731 self.expr = expr
2732 self.strRepr = None
2733 if expr is not None:
2734 self.mayIndexError = expr.mayIndexError
2735 self.mayReturnEmpty = expr.mayReturnEmpty
2736 self.setWhitespaceChars( expr.whiteChars )
2737 self.skipWhitespace = expr.skipWhitespace
2738 self.saveAsList = expr.saveAsList
2739 self.callPreparse = expr.callPreparse
2740 self.ignoreExprs.extend(expr.ignoreExprs)
2741
2742 - def parseImpl( self, instring, loc, doActions=True ):
2743 if self.expr is not None:
2744 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2745 else:
2746 raise ParseException("",loc,self.errmsg,self)
2747
2749 self.skipWhitespace = False
2750 self.expr = self.expr.copy()
2751 if self.expr is not None:
2752 self.expr.leaveWhitespace()
2753 return self
2754
2756 if isinstance( other, Suppress ):
2757 if other not in self.ignoreExprs:
2758 super( ParseElementEnhance, self).ignore( other )
2759 if self.expr is not None:
2760 self.expr.ignore( self.ignoreExprs[-1] )
2761 else:
2762 super( ParseElementEnhance, self).ignore( other )
2763 if self.expr is not None:
2764 self.expr.ignore( self.ignoreExprs[-1] )
2765 return self
2766
2772
2774 if self in parseElementList:
2775 raise RecursiveGrammarException( parseElementList+[self] )
2776 subRecCheckList = parseElementList[:] + [ self ]
2777 if self.expr is not None:
2778 self.expr.checkRecursion( subRecCheckList )
2779
2780 - def validate( self, validateTrace=[] ):
2781 tmp = validateTrace[:]+[self]
2782 if self.expr is not None:
2783 self.expr.validate(tmp)
2784 self.checkRecursion( [] )
2785
2787 try:
2788 return super(ParseElementEnhance,self).__str__()
2789 except:
2790 pass
2791
2792 if self.strRepr is None and self.expr is not None:
2793 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2794 return self.strRepr
2795
2798 """Lookahead matching of the given parse expression. C{FollowedBy}
2799 does *not* advance the parsing position within the input string, it only
2800 verifies that the specified parse expression matches at the current
2801 position. C{FollowedBy} always returns a null token list."""
2805
2806 - def parseImpl( self, instring, loc, doActions=True ):
2807 self.expr.tryParse( instring, loc )
2808 return loc, []
2809
2810
2811 -class NotAny(ParseElementEnhance):
2812 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2813 does *not* advance the parsing position within the input string, it only
2814 verifies that the specified parse expression does *not* match at the current
2815 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2816 always returns a null token list. May be constructed using the '~' operator."""
2818 super(NotAny,self).__init__(expr)
2819
2820 self.skipWhitespace = False
2821 self.mayReturnEmpty = True
2822 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2823
2824 - def parseImpl( self, instring, loc, doActions=True ):
2828
2830 if hasattr(self,"name"):
2831 return self.name
2832
2833 if self.strRepr is None:
2834 self.strRepr = "~{" + _ustr(self.expr) + "}"
2835
2836 return self.strRepr
2837
2840 """Repetition of one or more of the given expression.
2841
2842 Parameters:
2843 - expr - expression that must match one or more times
2844 - stopOn - (default=None) - expression for a terminating sentinel
2845 (only required if the sentinel would ordinarily match the repetition
2846 expression)
2847 """
2848 - def __init__( self, expr, stopOn=None):
2849 super(OneOrMore, self).__init__(expr)
2850 ender = stopOn
2851 if isinstance(ender, basestring):
2852 ender = Literal(ender)
2853 self.not_ender = ~ender if ender is not None else None
2854
2855 - def parseImpl( self, instring, loc, doActions=True ):
2856 self_expr_parse = self.expr._parse
2857 self_skip_ignorables = self._skipIgnorables
2858 check_ender = self.not_ender is not None
2859 if check_ender:
2860 try_not_ender = self.not_ender.tryParse
2861
2862
2863
2864 if check_ender:
2865 try_not_ender(instring, loc)
2866 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
2867 try:
2868 hasIgnoreExprs = (not not self.ignoreExprs)
2869 while 1:
2870 if check_ender:
2871 try_not_ender(instring, loc)
2872 if hasIgnoreExprs:
2873 preloc = self_skip_ignorables( instring, loc )
2874 else:
2875 preloc = loc
2876 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
2877 if tmptokens or tmptokens.haskeys():
2878 tokens += tmptokens
2879 except (ParseException,IndexError):
2880 pass
2881
2882 return loc, tokens
2883
2885 if hasattr(self,"name"):
2886 return self.name
2887
2888 if self.strRepr is None:
2889 self.strRepr = "{" + _ustr(self.expr) + "}..."
2890
2891 return self.strRepr
2892
2897
2899 """Optional repetition of zero or more of the given expression.
2900
2901 Parameters:
2902 - expr - expression that must match zero or more times
2903 - stopOn - (default=None) - expression for a terminating sentinel
2904 (only required if the sentinel would ordinarily match the repetition
2905 expression)
2906 """
2907 - def __init__( self, expr, stopOn=None):
2910
2911 - def parseImpl( self, instring, loc, doActions=True ):
2916
2918 if hasattr(self,"name"):
2919 return self.name
2920
2921 if self.strRepr is None:
2922 self.strRepr = "[" + _ustr(self.expr) + "]..."
2923
2924 return self.strRepr
2925
2932
2933 _optionalNotMatched = _NullToken()
2935 """Optional matching of the given expression.
2936
2937 Parameters:
2938 - expr - expression that must match zero or more times
2939 - default (optional) - value to be returned if the optional expression
2940 is not found.
2941 """
2943 super(Optional,self).__init__( expr, savelist=False )
2944 self.defaultValue = default
2945 self.mayReturnEmpty = True
2946
2947 - def parseImpl( self, instring, loc, doActions=True ):
2948 try:
2949 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2950 except (ParseException,IndexError):
2951 if self.defaultValue is not _optionalNotMatched:
2952 if self.expr.resultsName:
2953 tokens = ParseResults([ self.defaultValue ])
2954 tokens[self.expr.resultsName] = self.defaultValue
2955 else:
2956 tokens = [ self.defaultValue ]
2957 else:
2958 tokens = []
2959 return loc, tokens
2960
2962 if hasattr(self,"name"):
2963 return self.name
2964
2965 if self.strRepr is None:
2966 self.strRepr = "[" + _ustr(self.expr) + "]"
2967
2968 return self.strRepr
2969
2970 -class SkipTo(ParseElementEnhance):
2971 """Token for skipping over all undefined text until the matched expression is found.
2972
2973 Parameters:
2974 - expr - target expression marking the end of the data to be skipped
2975 - include - (default=False) if True, the target expression is also parsed
2976 (the skipped text and target expression are returned as a 2-element list).
2977 - ignore - (default=None) used to define grammars (typically quoted strings and
2978 comments) that might contain false matches to the target expression
2979 - failOn - (default=None) define expressions that are not allowed to be
2980 included in the skipped test; if found before the target expression is found,
2981 the SkipTo is not a match
2982 """
2983 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2984 super( SkipTo, self ).__init__( other )
2985 self.ignoreExpr = ignore
2986 self.mayReturnEmpty = True
2987 self.mayIndexError = False
2988 self.includeMatch = include
2989 self.asList = False
2990 if isinstance(failOn, basestring):
2991 self.failOn = Literal(failOn)
2992 else:
2993 self.failOn = failOn
2994 self.errmsg = "No match found for "+_ustr(self.expr)
2995
2996 - def parseImpl( self, instring, loc, doActions=True ):
2997 startloc = loc
2998 instrlen = len(instring)
2999 expr = self.expr
3000 expr_parse = self.expr._parse
3001 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
3002 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
3003
3004 tmploc = loc
3005 while tmploc <= instrlen:
3006 if self_failOn_canParseNext is not None:
3007
3008 if self_failOn_canParseNext(instring, tmploc):
3009 break
3010
3011 if self_ignoreExpr_tryParse is not None:
3012
3013 while 1:
3014 try:
3015 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3016 except ParseBaseException:
3017 break
3018
3019 try:
3020 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3021 except (ParseException, IndexError):
3022
3023 tmploc += 1
3024 else:
3025
3026 break
3027
3028 else:
3029
3030 raise ParseException(instring, loc, self.errmsg, self)
3031
3032
3033 loc = tmploc
3034 skiptext = instring[startloc:loc]
3035 skipresult = ParseResults(skiptext)
3036
3037 if self.includeMatch:
3038 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3039 skipresult += mat
3040
3041 return loc, skipresult
3042
3043 -class Forward(ParseElementEnhance):
3044 """Forward declaration of an expression to be defined later -
3045 used for recursive grammars, such as algebraic infix notation.
3046 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3047
3048 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3049 Specifically, '|' has a lower precedence than '<<', so that::
3050 fwdExpr << a | b | c
3051 will actually be evaluated as::
3052 (fwdExpr << a) | b | c
3053 thereby leaving b and c out as parseable alternatives. It is recommended that you
3054 explicitly group the values inserted into the C{Forward}::
3055 fwdExpr << (a | b | c)
3056 Converting to use the '<<=' operator instead will avoid this problem.
3057 """
3060
3062 if isinstance( other, basestring ):
3063 other = ParserElement.literalStringClass(other)
3064 self.expr = other
3065 self.strRepr = None
3066 self.mayIndexError = self.expr.mayIndexError
3067 self.mayReturnEmpty = self.expr.mayReturnEmpty
3068 self.setWhitespaceChars( self.expr.whiteChars )
3069 self.skipWhitespace = self.expr.skipWhitespace
3070 self.saveAsList = self.expr.saveAsList
3071 self.ignoreExprs.extend(self.expr.ignoreExprs)
3072 return self
3073
3075 return self << other
3076
3078 self.skipWhitespace = False
3079 return self
3080
3082 if not self.streamlined:
3083 self.streamlined = True
3084 if self.expr is not None:
3085 self.expr.streamline()
3086 return self
3087
3088 - def validate( self, validateTrace=[] ):
3089 if self not in validateTrace:
3090 tmp = validateTrace[:]+[self]
3091 if self.expr is not None:
3092 self.expr.validate(tmp)
3093 self.checkRecursion([])
3094
3096 if hasattr(self,"name"):
3097 return self.name
3098 return self.__class__.__name__ + ": ..."
3099
3100
3101 self._revertClass = self.__class__
3102 self.__class__ = _ForwardNoRecurse
3103 try:
3104 if self.expr is not None:
3105 retString = _ustr(self.expr)
3106 else:
3107 retString = "None"
3108 finally:
3109 self.__class__ = self._revertClass
3110 return self.__class__.__name__ + ": " + retString
3111
3113 if self.expr is not None:
3114 return super(Forward,self).copy()
3115 else:
3116 ret = Forward()
3117 ret <<= self
3118 return ret
3119
3123
3125 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3126 - def __init__( self, expr, savelist=False ):
3129
3131 """Converter to concatenate all matching tokens to a single string.
3132 By default, the matching patterns must also be contiguous in the input string;
3133 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3134 """
3135 - def __init__( self, expr, joinString="", adjacent=True ):
3136 super(Combine,self).__init__( expr )
3137
3138 if adjacent:
3139 self.leaveWhitespace()
3140 self.adjacent = adjacent
3141 self.skipWhitespace = True
3142 self.joinString = joinString
3143 self.callPreparse = True
3144
3151
3152 - def postParse( self, instring, loc, tokenlist ):
3153 retToks = tokenlist.copy()
3154 del retToks[:]
3155 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3156
3157 if self.resultsName and retToks.haskeys():
3158 return [ retToks ]
3159 else:
3160 return retToks
3161
3162 -class Group(TokenConverter):
3163 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3165 super(Group,self).__init__( expr )
3166 self.saveAsList = True
3167
3168 - def postParse( self, instring, loc, tokenlist ):
3169 return [ tokenlist ]
3170
3171 -class Dict(TokenConverter):
3172 """Converter to return a repetitive expression as a list, but also as a dictionary.
3173 Each element can also be referenced using the first token in the expression as its key.
3174 Useful for tabular report scraping when the first column can be used as a item key.
3175 """
3177 super(Dict,self).__init__( expr )
3178 self.saveAsList = True
3179
3180 - def postParse( self, instring, loc, tokenlist ):
3181 for i,tok in enumerate(tokenlist):
3182 if len(tok) == 0:
3183 continue
3184 ikey = tok[0]
3185 if isinstance(ikey,int):
3186 ikey = _ustr(tok[0]).strip()
3187 if len(tok)==1:
3188 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3189 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3190 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3191 else:
3192 dictvalue = tok.copy()
3193 del dictvalue[0]
3194 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3195 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3196 else:
3197 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3198
3199 if self.resultsName:
3200 return [ tokenlist ]
3201 else:
3202 return tokenlist
3203
3206 """Converter for ignoring the results of a parsed expression."""
3207 - def postParse( self, instring, loc, tokenlist ):
3209
3212
3215 """Wrapper for parse actions, to ensure they are only called once."""
3217 self.callable = _trim_arity(methodCall)
3218 self.called = False
3220 if not self.called:
3221 results = self.callable(s,l,t)
3222 self.called = True
3223 return results
3224 raise ParseException(s,l,"")
3227
3229 """Decorator for debugging parse actions."""
3230 f = _trim_arity(f)
3231 def z(*paArgs):
3232 thisFunc = f.func_name
3233 s,l,t = paArgs[-3:]
3234 if len(paArgs)>3:
3235 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3236 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3237 try:
3238 ret = f(*paArgs)
3239 except Exception as exc:
3240 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3241 raise
3242 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3243 return ret
3244 try:
3245 z.__name__ = f.__name__
3246 except AttributeError:
3247 pass
3248 return z
3249
3250
3251
3252
3253 -def delimitedList( expr, delim=",", combine=False ):
3254 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3255 By default, the list elements and delimiters can have intervening whitespace, and
3256 comments, but this can be overridden by passing C{combine=True} in the constructor.
3257 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3258 string, with the delimiters included; otherwise, the matching tokens are returned
3259 as a list of tokens, with the delimiters suppressed.
3260 """
3261 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3262 if combine:
3263 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3264 else:
3265 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3266
3268 """Helper to define a counted list of expressions.
3269 This helper defines a pattern of the form::
3270 integer expr expr expr...
3271 where the leading integer tells how many expr expressions follow.
3272 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3273 """
3274 arrayExpr = Forward()
3275 def countFieldParseAction(s,l,t):
3276 n = t[0]
3277 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3278 return []
3279 if intExpr is None:
3280 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3281 else:
3282 intExpr = intExpr.copy()
3283 intExpr.setName("arrayLen")
3284 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3285 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
3286
3288 ret = []
3289 for i in L:
3290 if isinstance(i,list):
3291 ret.extend(_flatten(i))
3292 else:
3293 ret.append(i)
3294 return ret
3295
3297 """Helper to define an expression that is indirectly defined from
3298 the tokens matched in a previous expression, that is, it looks
3299 for a 'repeat' of a previous expression. For example::
3300 first = Word(nums)
3301 second = matchPreviousLiteral(first)
3302 matchExpr = first + ":" + second
3303 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3304 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3305 If this is not desired, use C{matchPreviousExpr}.
3306 Do *not* use with packrat parsing enabled.
3307 """
3308 rep = Forward()
3309 def copyTokenToRepeater(s,l,t):
3310 if t:
3311 if len(t) == 1:
3312 rep << t[0]
3313 else:
3314
3315 tflat = _flatten(t.asList())
3316 rep << And(Literal(tt) for tt in tflat)
3317 else:
3318 rep << Empty()
3319 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3320 rep.setName('(prev) ' + _ustr(expr))
3321 return rep
3322
3324 """Helper to define an expression that is indirectly defined from
3325 the tokens matched in a previous expression, that is, it looks
3326 for a 'repeat' of a previous expression. For example::
3327 first = Word(nums)
3328 second = matchPreviousExpr(first)
3329 matchExpr = first + ":" + second
3330 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3331 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3332 the expressions are evaluated first, and then compared, so
3333 C{"1"} is compared with C{"10"}.
3334 Do *not* use with packrat parsing enabled.
3335 """
3336 rep = Forward()
3337 e2 = expr.copy()
3338 rep <<= e2
3339 def copyTokenToRepeater(s,l,t):
3340 matchTokens = _flatten(t.asList())
3341 def mustMatchTheseTokens(s,l,t):
3342 theseTokens = _flatten(t.asList())
3343 if theseTokens != matchTokens:
3344 raise ParseException("",0,"")
3345 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3346 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3347 rep.setName('(prev) ' + _ustr(expr))
3348 return rep
3349
3351
3352 for c in r"\^-]":
3353 s = s.replace(c,_bslash+c)
3354 s = s.replace("\n",r"\n")
3355 s = s.replace("\t",r"\t")
3356 return _ustr(s)
3357
3358 -def oneOf( strs, caseless=False, useRegex=True ):
3359 """Helper to quickly define a set of alternative Literals, and makes sure to do
3360 longest-first testing when there is a conflict, regardless of the input order,
3361 but returns a C{L{MatchFirst}} for best performance.
3362
3363 Parameters:
3364 - strs - a string of space-delimited literals, or a list of string literals
3365 - caseless - (default=False) - treat all literals as caseless
3366 - useRegex - (default=True) - as an optimization, will generate a Regex
3367 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3368 if creating a C{Regex} raises an exception)
3369 """
3370 if caseless:
3371 isequal = ( lambda a,b: a.upper() == b.upper() )
3372 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3373 parseElementClass = CaselessLiteral
3374 else:
3375 isequal = ( lambda a,b: a == b )
3376 masks = ( lambda a,b: b.startswith(a) )
3377 parseElementClass = Literal
3378
3379 symbols = []
3380 if isinstance(strs,basestring):
3381 symbols = strs.split()
3382 elif isinstance(strs, collections.Sequence):
3383 symbols = list(strs[:])
3384 elif isinstance(strs, _generatorType):
3385 symbols = list(strs)
3386 else:
3387 warnings.warn("Invalid argument to oneOf, expected string or list",
3388 SyntaxWarning, stacklevel=2)
3389 if not symbols:
3390 return NoMatch()
3391
3392 i = 0
3393 while i < len(symbols)-1:
3394 cur = symbols[i]
3395 for j,other in enumerate(symbols[i+1:]):
3396 if ( isequal(other, cur) ):
3397 del symbols[i+j+1]
3398 break
3399 elif ( masks(cur, other) ):
3400 del symbols[i+j+1]
3401 symbols.insert(i,other)
3402 cur = other
3403 break
3404 else:
3405 i += 1
3406
3407 if not caseless and useRegex:
3408
3409 try:
3410 if len(symbols)==len("".join(symbols)):
3411 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
3412 else:
3413 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
3414 except:
3415 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3416 SyntaxWarning, stacklevel=2)
3417
3418
3419
3420 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3421
3423 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3424 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3425 in the proper order. The key pattern can include delimiting markers or punctuation,
3426 as long as they are suppressed, thereby leaving the significant key text. The value
3427 pattern can include named results, so that the C{Dict} results can include named token
3428 fields.
3429 """
3430 return Dict( ZeroOrMore( Group ( key + value ) ) )
3431
3432 -def originalTextFor(expr, asString=True):
3433 """Helper to return the original, untokenized text for a given expression. Useful to
3434 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3435 revert separate tokens with intervening whitespace back to the original matching
3436 input text. By default, returns astring containing the original parsed text.
3437
3438 If the optional C{asString} argument is passed as C{False}, then the return value is a
3439 C{L{ParseResults}} containing any results names that were originally matched, and a
3440 single token containing the original matched text from the input string. So if
3441 the expression passed to C{L{originalTextFor}} contains expressions with defined
3442 results names, you must set C{asString} to C{False} if you want to preserve those
3443 results name values."""
3444 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3445 endlocMarker = locMarker.copy()
3446 endlocMarker.callPreparse = False
3447 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3448 if asString:
3449 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3450 else:
3451 def extractText(s,l,t):
3452 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3453 matchExpr.setParseAction(extractText)
3454 return matchExpr
3455
3457 """Helper to undo pyparsing's default grouping of And expressions, even
3458 if all but one are non-empty."""
3459 return TokenConverter(expr).setParseAction(lambda t:t[0])
3460
3462 """Helper to decorate a returned token with its starting and ending locations in the input string.
3463 This helper adds the following results names:
3464 - locn_start = location where matched expression begins
3465 - locn_end = location where matched expression ends
3466 - value = the actual parsed results
3467
3468 Be careful if the input text contains C{<TAB>} characters, you may want to call
3469 C{L{ParserElement.parseWithTabs}}
3470 """
3471 locator = Empty().setParseAction(lambda s,l,t: l)
3472 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3473
3474
3475
3476 empty = Empty().setName("empty")
3477 lineStart = LineStart().setName("lineStart")
3478 lineEnd = LineEnd().setName("lineEnd")
3479 stringStart = StringStart().setName("stringStart")
3480 stringEnd = StringEnd().setName("stringEnd")
3481
3482 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3483 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3484 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3485 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
3486 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3487 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3490 r"""Helper to easily define string ranges for use in Word construction. Borrows
3491 syntax from regexp '[]' string range definitions::
3492 srange("[0-9]") -> "0123456789"
3493 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3494 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3495 The input string must be enclosed in []'s, and the returned string is the expanded
3496 character set joined into a single string.
3497 The values enclosed in the []'s may be::
3498 a single character
3499 an escaped character with a leading backslash (such as \- or \])
3500 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3501 (\0x## is also supported for backwards compatibility)
3502 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3503 a range of any of the above, separated by a dash ('a-z', etc.)
3504 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3505 """
3506 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3507 try:
3508 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3509 except:
3510 return ""
3511
3513 """Helper method for defining parse actions that require matching at a specific
3514 column in the input text.
3515 """
3516 def verifyCol(strg,locn,toks):
3517 if col(locn,strg) != n:
3518 raise ParseException(strg,locn,"matched token not at column %d" % n)
3519 return verifyCol
3520
3522 """Helper method for common parse actions that simply return a literal value. Especially
3523 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3524 """
3525 return lambda s,l,t: [replStr]
3526
3528 """Helper parse action for removing quotation marks from parsed quoted strings.
3529 To use, add this parse action to quoted string using::
3530 quotedString.setParseAction( removeQuotes )
3531 """
3532 return t[0][1:-1]
3533
3535 """Helper parse action to convert tokens to upper case."""
3536 return [ tt.upper() for tt in map(_ustr,t) ]
3537
3539 """Helper parse action to convert tokens to lower case."""
3540 return [ tt.lower() for tt in map(_ustr,t) ]
3541
3570
3574
3578
3580 """Helper to create a validating parse action to be used with start tags created
3581 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3582 with a required attribute value, to avoid false matches on common tags such as
3583 C{<TD>} or C{<DIV>}.
3584
3585 Call C{withAttribute} with a series of attribute names and values. Specify the list
3586 of filter attributes names and values as:
3587 - keyword arguments, as in C{(align="right")}, or
3588 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3589 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3590 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3591 For attribute names with a namespace prefix, you must use the second form. Attribute
3592 names are matched insensitive to upper/lower case.
3593
3594 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
3595
3596 To verify that the attribute exists, but without specifying a value, pass
3597 C{withAttribute.ANY_VALUE} as the value.
3598 """
3599 if args:
3600 attrs = args[:]
3601 else:
3602 attrs = attrDict.items()
3603 attrs = [(k,v) for k,v in attrs]
3604 def pa(s,l,tokens):
3605 for attrName,attrValue in attrs:
3606 if attrName not in tokens:
3607 raise ParseException(s,l,"no matching attribute " + attrName)
3608 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3609 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3610 (attrName, tokens[attrName], attrValue))
3611 return pa
3612 withAttribute.ANY_VALUE = object()
3613
3614 -def withClass(classname, namespace=''):
3615 """Simplified version of C{L{withAttribute}} when matching on a div class - made
3616 difficult because C{class} is a reserved word in Python.
3617 """
3618 classattr = "%s:class" % namespace if namespace else "class"
3619 return withAttribute(**{classattr : classname})
3620
3621 opAssoc = _Constants()
3622 opAssoc.LEFT = object()
3623 opAssoc.RIGHT = object()
3626 """Helper method for constructing grammars of expressions made up of
3627 operators working in a precedence hierarchy. Operators may be unary or
3628 binary, left- or right-associative. Parse actions can also be attached
3629 to operator expressions.
3630
3631 Parameters:
3632 - baseExpr - expression representing the most basic element for the nested
3633 - opList - list of tuples, one for each operator precedence level in the
3634 expression grammar; each tuple is of the form
3635 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3636 - opExpr is the pyparsing expression for the operator;
3637 may also be a string, which will be converted to a Literal;
3638 if numTerms is 3, opExpr is a tuple of two expressions, for the
3639 two operators separating the 3 terms
3640 - numTerms is the number of terms for this operator (must
3641 be 1, 2, or 3)
3642 - rightLeftAssoc is the indicator whether the operator is
3643 right or left associative, using the pyparsing-defined
3644 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3645 - parseAction is the parse action to be associated with
3646 expressions matching this operator expression (the
3647 parse action tuple member may be omitted)
3648 - lpar - expression for matching left-parentheses (default=Suppress('('))
3649 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3650 """
3651 ret = Forward()
3652 lastExpr = baseExpr | ( lpar + ret + rpar )
3653 for i,operDef in enumerate(opList):
3654 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3655 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
3656 if arity == 3:
3657 if opExpr is None or len(opExpr) != 2:
3658 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3659 opExpr1, opExpr2 = opExpr
3660 thisExpr = Forward().setName(termName)
3661 if rightLeftAssoc == opAssoc.LEFT:
3662 if arity == 1:
3663 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3664 elif arity == 2:
3665 if opExpr is not None:
3666 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3667 else:
3668 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3669 elif arity == 3:
3670 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3671 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3672 else:
3673 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3674 elif rightLeftAssoc == opAssoc.RIGHT:
3675 if arity == 1:
3676
3677 if not isinstance(opExpr, Optional):
3678 opExpr = Optional(opExpr)
3679 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3680 elif arity == 2:
3681 if opExpr is not None:
3682 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3683 else:
3684 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3685 elif arity == 3:
3686 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3687 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3688 else:
3689 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3690 else:
3691 raise ValueError("operator must indicate right or left associativity")
3692 if pa:
3693 matchExpr.setParseAction( pa )
3694 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
3695 lastExpr = thisExpr
3696 ret <<= lastExpr
3697 return ret
3698 operatorPrecedence = infixNotation
3699
3700 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
3701 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
3702 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
3703 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
3704 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3707 """Helper method for defining nested lists enclosed in opening and closing
3708 delimiters ("(" and ")" are the default).
3709
3710 Parameters:
3711 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3712 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3713 - content - expression for items within the nested lists (default=None)
3714 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3715
3716 If an expression is not provided for the content argument, the nested
3717 expression will capture all whitespace-delimited content between delimiters
3718 as a list of separate values.
3719
3720 Use the C{ignoreExpr} argument to define expressions that may contain
3721 opening or closing characters that should not be treated as opening
3722 or closing characters for nesting, such as quotedString or a comment
3723 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3724 The default is L{quotedString}, but if no expressions are to be ignored,
3725 then pass C{None} for this argument.
3726 """
3727 if opener == closer:
3728 raise ValueError("opening and closing strings cannot be the same")
3729 if content is None:
3730 if isinstance(opener,basestring) and isinstance(closer,basestring):
3731 if len(opener) == 1 and len(closer)==1:
3732 if ignoreExpr is not None:
3733 content = (Combine(OneOrMore(~ignoreExpr +
3734 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3735 ).setParseAction(lambda t:t[0].strip()))
3736 else:
3737 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3738 ).setParseAction(lambda t:t[0].strip()))
3739 else:
3740 if ignoreExpr is not None:
3741 content = (Combine(OneOrMore(~ignoreExpr +
3742 ~Literal(opener) + ~Literal(closer) +
3743 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3744 ).setParseAction(lambda t:t[0].strip()))
3745 else:
3746 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3747 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3748 ).setParseAction(lambda t:t[0].strip()))
3749 else:
3750 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3751 ret = Forward()
3752 if ignoreExpr is not None:
3753 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3754 else:
3755 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3756 ret.setName('nested %s%s expression' % (opener,closer))
3757 return ret
3758
3759 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3760 """Helper method for defining space-delimited indentation blocks, such as
3761 those used to define block statements in Python source code.
3762
3763 Parameters:
3764 - blockStatementExpr - expression defining syntax of statement that
3765 is repeated within the indented block
3766 - indentStack - list created by caller to manage indentation stack
3767 (multiple statementWithIndentedBlock expressions within a single grammar
3768 should share a common indentStack)
3769 - indent - boolean indicating whether block must be indented beyond the
3770 the current level; set to False for block of left-most statements
3771 (default=True)
3772
3773 A valid block must contain at least one C{blockStatement}.
3774 """
3775 def checkPeerIndent(s,l,t):
3776 if l >= len(s): return
3777 curCol = col(l,s)
3778 if curCol != indentStack[-1]:
3779 if curCol > indentStack[-1]:
3780 raise ParseFatalException(s,l,"illegal nesting")
3781 raise ParseException(s,l,"not a peer entry")
3782
3783 def checkSubIndent(s,l,t):
3784 curCol = col(l,s)
3785 if curCol > indentStack[-1]:
3786 indentStack.append( curCol )
3787 else:
3788 raise ParseException(s,l,"not a subentry")
3789
3790 def checkUnindent(s,l,t):
3791 if l >= len(s): return
3792 curCol = col(l,s)
3793 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3794 raise ParseException(s,l,"not an unindent")
3795 indentStack.pop()
3796
3797 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3798 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
3799 PEER = Empty().setParseAction(checkPeerIndent).setName('')
3800 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
3801 if indent:
3802 smExpr = Group( Optional(NL) +
3803
3804 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3805 else:
3806 smExpr = Group( Optional(NL) +
3807 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3808 blockStatementExpr.ignore(_bslash + LineEnd())
3809 return smExpr.setName('indented block')
3810
3811 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3812 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3813
3814 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
3815 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
3816 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3818 """Helper parser action to replace common HTML entities with their special characters"""
3819 return _htmlEntityMap.get(t.entity)
3820
3821
3822 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
3823
3824 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
3825 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
3826 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
3827 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
3828
3829 javaStyleComment = cppStyleComment
3830 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3831 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3832 Optional( Word(" \t") +
3833 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3834 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3835
3836
3837 if __name__ == "__main__":
3838
3839 selectToken = CaselessLiteral( "select" )
3840 fromToken = CaselessLiteral( "from" )
3841
3842 ident = Word( alphas, alphanums + "_$" )
3843 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3844 columnNameList = Group( delimitedList( columnName ) ).setName("columns")
3845 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3846 tableNameList = Group( delimitedList( tableName ) ).setName("tables")
3847 simpleSQL = ( selectToken + \
3848 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3849 fromToken + \
3850 tableNameList.setResultsName( "tables" ) )
3851
3852 simpleSQL.runTests("""\
3853 SELECT * from XYZZY, ABC
3854 select * from SYS.XYZZY
3855 Select A from Sys.dual
3856 Select AA,BB,CC from Sys.dual
3857 Select A, B, C from Sys.dual
3858 Select A, B, C from Sys.dual
3859 Xelect A, B, C from Sys.dual
3860 Select A, B, C frox Sys.dual
3861 Select
3862 Select ^^^ frox Sys.dual
3863 Select A, B, C from Sys.dual, Table2""")
3864