@@ -38,6 +38,7 @@ local BRACKETS = "[%[%]]+" -- needs to be separate pattern from other operators
3838local IDEN = " [%a_][%w_]*"
3939local STRING_EMPTY = " (['\" ])%1" -- Empty String
4040local STRING_PLAIN = " (['\" ])[^\n ]-([^\\ ]%1)" -- TODO: Handle escaping escapes
41+ local STRING_INTER = " `[^\n ]-`"
4142local STRING_INCOMP_A = " (['\" ]).-\n " -- Incompleted String with next line
4243local STRING_INCOMP_B = " (['\" ])[^\n ]*" -- Incompleted String without next line
4344local STRING_MULTI = " %[(=*)%[.-%]%1%]" -- Multiline-String
@@ -72,6 +73,7 @@ local lua_matches = {
7273 { Prefix .. STRING_INCOMP_B .. Suffix , " string" },
7374 { Prefix .. STRING_MULTI .. Suffix , " string" },
7475 { Prefix .. STRING_MULTI_INCOMP .. Suffix , " string" },
76+ { Prefix .. STRING_INTER .. Suffix , " string_inter" },
7577
7678 -- Comments
7779 { Prefix .. COMMENT_MULTI .. Suffix , " comment" },
@@ -90,74 +92,138 @@ local lua_matches = {
9092 { " ^." , " iden" },
9193}
9294
95+ -- To reduce the amount of table indexing during lexing, we separate the matches now
96+ local PATTERNS , TOKENS = {}, {}
97+ for i , m in lua_matches do
98+ PATTERNS [i ] = m [1 ]
99+ TOKENS [i ] = m [2 ]
100+ end
101+
93102--- Create a plain token iterator from a string.
94103-- @tparam string s a string.
95104
96105function lexer .scan (s : string )
97- -- local startTime = os.clock()
98- lexer .finished = false
99-
100106 local index = 1
101- local sz = # s
102- local p1 , p2 , p3 , pT = " " , " " , " " , " "
103-
104- return function ()
105- if index <= sz then
106- for _ , m in ipairs (lua_matches ) do
107- local i1 , i2 = string.find (s , m [1 ], index )
108- if i1 then
109- local tok = string.sub (s , i1 , i2 )
110- index = i2 + 1
111- lexer .finished = index > sz
112- -- if lexer.finished then
113- -- print((os.clock()-startTime)*1000, "ms")
114- -- end
115-
116- local t = m [2 ]
117- local t2 = t
118-
119- -- Process t into t2
120- if t == " var" then
121- -- Since we merge spaces into the tok, we need to remove them
122- -- in order to check the actual word it contains
123- local cleanTok = string.gsub (tok , Cleaner , " " )
124-
125- if lua_keyword [cleanTok ] then
126- t2 = " keyword"
127- elseif lua_builtin [cleanTok ] then
128- t2 = " builtin"
107+ local size = # s
108+ local previousContent1 , previousContent2 , previousContent3 , previousToken = " " , " " , " " , " "
109+
110+ local thread = coroutine.create (function ()
111+ while index <= size do
112+ local matched = false
113+ for tokenType , pattern in ipairs (PATTERNS ) do
114+ -- Find match
115+ local start , finish = string.find (s , pattern , index )
116+ if start == nil then continue end
117+
118+ -- Move head
119+ index = finish + 1
120+ matched = true
121+
122+ -- Gather results
123+ local content = string.sub (s , start , finish )
124+ local rawToken = TOKENS [tokenType ]
125+ local processedToken = rawToken
126+
127+ -- Process token
128+ if rawToken == " var" then
129+ -- Since we merge spaces into the tok, we need to remove them
130+ -- in order to check the actual word it contains
131+ local cleanContent = string.gsub (content , Cleaner , " " )
132+
133+ if lua_keyword [cleanContent ] then
134+ processedToken = " keyword"
135+ elseif lua_builtin [cleanContent ] then
136+ processedToken = " builtin"
137+ elseif string.find (previousContent1 , " %.[%s%c]*$" ) and previousToken ~= " comment" then
138+ -- The previous was a . so we need to special case indexing things
139+ local parent = string.gsub (previousContent2 , Cleaner , " " )
140+ local lib = lua_libraries [parent ]
141+ if lib and lib [cleanContent ] and not string.find (previousContent3 , " %.[%s%c]*$" ) then
142+ -- Indexing a builtin lib with existing item, treat as a builtin
143+ processedToken = " builtin"
129144 else
130- t2 = " iden"
145+ -- Indexing a non builtin, can't be treated as a keyword/builtin
146+ processedToken = " iden"
131147 end
148+ -- print("indexing",parent,"with",cleanTok,"as",t2)
149+ else
150+ processedToken = " iden"
151+ end
152+ elseif rawToken == " string_inter" then
153+ if not string.find (content , " [^\\ ]{" ) then
154+ -- This inter string doesnt actually have any inters
155+ processedToken = " string"
156+ else
157+ -- We're gonna do our own yields, so the main loop won't need to
158+ -- Our yields will be a mix of string and whatever is inside the inters
159+ processedToken = nil
160+
161+ local isString = true
162+ local subIndex = 1
163+ local subSize = # content
164+ while subIndex <= subSize do
165+ -- Find next brace
166+ local subStart , subFinish = string.find (content , " ^.-[^\\ ][{}]" , subIndex )
167+ if subStart == nil then
168+ -- No more braces, all string
169+ coroutine.yield (" string" , string.sub (content , subIndex ))
170+ break
171+ end
172+
173+ if isString then
174+ -- We are currently a string
175+ subIndex = subFinish + 1
176+ coroutine.yield (" string" , string.sub (content , subStart , subFinish ))
132177
133- if string.find (p1 , " %.[%s%c]*$" ) and pT ~= " comment" then
134- -- The previous was a . so we need to special case indexing things
135- local parent = string.gsub (p2 , Cleaner , " " )
136- local lib = lua_libraries [parent ]
137- if lib and lib [cleanTok ] and not string.find (p3 , " %.[%s%c]*$" ) then
138- -- Indexing a builtin lib with existing item, treat as a builtin
139- t2 = " builtin"
178+ -- This brace opens code
179+ isString = false
140180 else
141- -- Indexing a non builtin, can't be treated as a keyword/builtin
142- t2 = " iden"
181+ -- We are currently in code
182+ subIndex = subFinish
183+ local subContent = string.sub (content , subStart , subFinish - 1 )
184+ for innerToken , innerContent in lexer .scan (subContent ) do
185+ coroutine.yield (innerToken , innerContent )
186+ end
187+
188+ -- This brace opens string/closes code
189+ isString = true
143190 end
144- -- print("indexing",parent,"with",cleanTok,"as",t2)
145191 end
146192 end
193+ end
147194
148- -- Record last 3 tokens for the indexing context check
149- p3 = p2
150- p2 = p1
151- p1 = tok
152- pT = t2
153- return t2 , tok
195+ -- Record last 3 tokens for the indexing context check
196+ previousContent3 = previousContent2
197+ previousContent2 = previousContent1
198+ previousContent1 = content
199+ previousToken = processedToken or rawToken
200+ if processedToken then
201+ coroutine.yield (processedToken , content )
154202 end
203+ break
204+ end
205+
206+ -- No matches found
207+ if not matched then
208+ return
155209 end
156- -- No matches
157- return nil
158210 end
159- -- Reached end
160- return nil
211+
212+ -- Completed the scan
213+ return
214+ end )
215+
216+ return function ()
217+ if coroutine.status (thread ) == " dead" then
218+ return
219+ end
220+
221+ local success , token , content = coroutine.resume (thread )
222+ if success and token then
223+ return token , content
224+ end
225+
226+ return
161227 end
162228end
163229
0 commit comments