33lSingleCharacterSymbols = {"," , ":" , "(" , ")" , "'" , '"' , "+" , "&" , "-" , "*" , "/" , "<" , ">" , ";" , "=" , "[" , "]" , "?" }
44lTwoCharacterSymbols = {"=>" , "**" , ":=" , "/=" , ">=" , "<=" , "<>" , "??" , "?=" , "?<" , "?>" , "<<" , ">>" , "--" , "/*" , "*/" }
55lThreeCharacterSymbols = {"?/=" , "?<=" , "?>=" }
6- lFourCharacterSymbols = { " \\ ?= \\ " }
6+ lMultiCharacterSymbols = lTwoCharacterSymbols | lThreeCharacterSymbols
77
88lStopChars = {" " , "(" , ";" }
99
1010
11+ def build_symbol_prefix_tree (lSymbols ):
12+ dPrefixTree = {}
13+ for sSymbol in lSymbols :
14+ dNode = dPrefixTree
15+ for oChar in sSymbol :
16+ # Return the branch of the prefix tree for this character, or create an empty branch if there isn't one.
17+ dNode = dNode .setdefault (oChar , {})
18+ # Use $ as the marker for the end of a branch.
19+ dNode ["$" ] = sSymbol
20+ return dPrefixTree
21+
22+
23+ dSymbolTree = build_symbol_prefix_tree (lMultiCharacterSymbols )
24+
25+
1126def create (sString ):
1227 """
1328 This function takes a string and returns a list of tokens.
@@ -17,8 +32,7 @@ def create(sString):
1732 oLine .combine_whitespace ()
1833 oLine .combine_string_literals ()
1934 oLine .combine_backslash_characters_into_symbols ()
20- oLine .combine_three_character_symbols ()
21- oLine .combine_two_character_symbols ()
35+ oLine .combine_symbols_with_prefix_tree ()
2236 oLine .combine_characters_into_words ()
2337 oLine .combine_character_literals ()
2438 oLine .split_natural_numbers ()
@@ -46,6 +60,30 @@ def combine_whitespace(self):
4660
4761 self .lChars = lReturn
4862
63+ def combine_symbols_with_prefix_tree (self ):
64+ lReturn = []
65+ iStart = 0
66+ iNumChars = len (self .lChars )
67+ while iStart < iNumChars :
68+ dNode = dSymbolTree
69+ iEnd = iStart
70+ oLastMatch = None
71+ iPrevEnd = iStart
72+ # Try to match as long a symbol as possible.
73+ while iEnd < iNumChars and self .lChars [iEnd ] in dNode :
74+ dNode = dNode [self .lChars [iEnd ]]
75+ iEnd += 1
76+ if "$" in dNode :
77+ oLastMatch = dNode ["$" ]
78+ iPrevEnd = iEnd
79+ if oLastMatch :
80+ lReturn .append (oLastMatch )
81+ iStart = iPrevEnd
82+ else :
83+ lReturn .append (self .lChars [iStart ])
84+ iStart += 1
85+ self .lChars = lReturn
86+
4987 def combine_backslash_characters_into_symbols (self ):
5088 lReturn = []
5189 sSymbol = ""
@@ -61,34 +99,6 @@ def combine_backslash_characters_into_symbols(self):
6199 lReturn = add_trailing_string (lReturn , sSymbol )
62100 self .lChars = lReturn
63101
64- def combine_three_character_symbols (self ):
65- lReturn = []
66- i = 0
67- while i < len (self .lChars ):
68- sChars = "" .join (self .lChars [i : i + 3 ])
69- if sChars in lThreeCharacterSymbols :
70- lReturn .append (sChars )
71- i += 3
72- else :
73- lReturn .append (self .lChars [i ])
74- i += 1
75-
76- self .lChars = lReturn
77-
78- def combine_two_character_symbols (self ):
79- lReturn = []
80- i = 0
81- while i < len (self .lChars ):
82- sChars = "" .join (self .lChars [i : i + 2 ])
83- if sChars in lTwoCharacterSymbols :
84- lReturn .append (sChars )
85- i += 2
86- else :
87- lReturn .append (self .lChars [i ])
88- i += 1
89-
90- self .lChars = lReturn
91-
92102 def combine_characters_into_words (self ):
93103 lReturn = []
94104 sWord = []
@@ -98,12 +108,12 @@ def combine_characters_into_words(self):
98108 sWord .append (sChar )
99109 else :
100110 if sWord :
101- lReturn .append ('' .join (sWord ))
111+ lReturn .append ("" .join (sWord ))
102112 sWord .clear ()
103113 lReturn .append (sChar )
104114
105115 if sWord :
106- lReturn .append ('' .join (sWord ))
116+ lReturn .append ("" .join (sWord ))
107117
108118 self .lChars = lReturn
109119
@@ -279,11 +289,7 @@ def add_trailing_string(lReturn, sString):
279289
280290
281291def character_is_part_of_word (sChar ):
282- return (
283- len (sChar ) == 1
284- and not sChar .isspace ()
285- and sChar not in lSingleCharacterSymbols
286- )
292+ return len (sChar ) == 1 and not sChar .isspace () and sChar not in lSingleCharacterSymbols
287293
288294
289295def find_indexes_of_double_quote_pairs (lTokens ):
0 commit comments