Skip to content

Commit 30aef7b

Browse files
committed
Support string interpolation syntax
1 parent e6c5e44 commit 30aef7b

File tree

3 files changed

+120
-54
lines changed

3 files changed

+120
-54
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@boatbomber/highlighter",
3-
"version": "0.6.2",
3+
"version": "0.7.0",
44
"license": "MIT",
55
"repository": {
66
"type": "git",

src/lexer/init.lua

Lines changed: 118 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ local BRACKETS = "[%[%]]+" -- needs to be separate pattern from other operators
3838
local IDEN = "[%a_][%w_]*"
3939
local STRING_EMPTY = "(['\"])%1" --Empty String
4040
local STRING_PLAIN = "(['\"])[^\n]-([^\\]%1)" --TODO: Handle escaping escapes
41+
local STRING_INTER = "`[^\n]-`"
4142
local STRING_INCOMP_A = "(['\"]).-\n" --Incompleted String with next line
4243
local STRING_INCOMP_B = "(['\"])[^\n]*" --Incompleted String without next line
4344
local STRING_MULTI = "%[(=*)%[.-%]%1%]" --Multiline-String
@@ -72,6 +73,7 @@ local lua_matches = {
7273
{ Prefix .. STRING_INCOMP_B .. Suffix, "string" },
7374
{ Prefix .. STRING_MULTI .. Suffix, "string" },
7475
{ Prefix .. STRING_MULTI_INCOMP .. Suffix, "string" },
76+
{ Prefix .. STRING_INTER .. Suffix, "string_inter" },
7577

7678
-- Comments
7779
{ Prefix .. COMMENT_MULTI .. Suffix, "comment" },
@@ -90,74 +92,138 @@ local lua_matches = {
9092
{ "^.", "iden" },
9193
}
9294

95+
-- To reduce the amount of table indexing during lexing, we separate the matches now
96+
local PATTERNS, TOKENS = {}, {}
97+
for i, m in lua_matches do
98+
PATTERNS[i] = m[1]
99+
TOKENS[i] = m[2]
100+
end
101+
93102
--- Create a plain token iterator from a string.
94103
-- @tparam string s a string.
95104

96105
function lexer.scan(s: string)
97-
-- local startTime = os.clock()
98-
lexer.finished = false
99-
100106
local index = 1
101-
local sz = #s
102-
local p1, p2, p3, pT = "", "", "", ""
103-
104-
return function()
105-
if index <= sz then
106-
for _, m in ipairs(lua_matches) do
107-
local i1, i2 = string.find(s, m[1], index)
108-
if i1 then
109-
local tok = string.sub(s, i1, i2)
110-
index = i2 + 1
111-
lexer.finished = index > sz
112-
--if lexer.finished then
113-
-- print((os.clock()-startTime)*1000, "ms")
114-
--end
115-
116-
local t = m[2]
117-
local t2 = t
118-
119-
-- Process t into t2
120-
if t == "var" then
121-
-- Since we merge spaces into the tok, we need to remove them
122-
-- in order to check the actual word it contains
123-
local cleanTok = string.gsub(tok, Cleaner, "")
124-
125-
if lua_keyword[cleanTok] then
126-
t2 = "keyword"
127-
elseif lua_builtin[cleanTok] then
128-
t2 = "builtin"
107+
local size = #s
108+
local previousContent1, previousContent2, previousContent3, previousToken = "", "", "", ""
109+
110+
local thread = coroutine.create(function()
111+
while index <= size do
112+
local matched = false
113+
for tokenType, pattern in ipairs(PATTERNS) do
114+
-- Find match
115+
local start, finish = string.find(s, pattern, index)
116+
if start == nil then continue end
117+
118+
-- Move head
119+
index = finish + 1
120+
matched = true
121+
122+
-- Gather results
123+
local content = string.sub(s, start, finish)
124+
local rawToken = TOKENS[tokenType]
125+
local processedToken = rawToken
126+
127+
-- Process token
128+
if rawToken == "var" then
129+
-- Since we merge spaces into the tok, we need to remove them
130+
-- in order to check the actual word it contains
131+
local cleanContent = string.gsub(content, Cleaner, "")
132+
133+
if lua_keyword[cleanContent] then
134+
processedToken = "keyword"
135+
elseif lua_builtin[cleanContent] then
136+
processedToken = "builtin"
137+
elseif string.find(previousContent1, "%.[%s%c]*$") and previousToken ~= "comment" then
138+
-- The previous was a . so we need to special case indexing things
139+
local parent = string.gsub(previousContent2, Cleaner, "")
140+
local lib = lua_libraries[parent]
141+
if lib and lib[cleanContent] and not string.find(previousContent3, "%.[%s%c]*$") then
142+
-- Indexing a builtin lib with existing item, treat as a builtin
143+
processedToken = "builtin"
129144
else
130-
t2 = "iden"
145+
-- Indexing a non builtin, can't be treated as a keyword/builtin
146+
processedToken = "iden"
131147
end
148+
-- print("indexing",parent,"with",cleanTok,"as",t2)
149+
else
150+
processedToken = "iden"
151+
end
152+
elseif rawToken == "string_inter" then
153+
if not string.find(content, "[^\\]{") then
154+
-- This inter string doesnt actually have any inters
155+
processedToken = "string"
156+
else
157+
-- We're gonna do our own yields, so the main loop won't need to
158+
-- Our yields will be a mix of string and whatever is inside the inters
159+
processedToken = nil
160+
161+
local isString = true
162+
local subIndex = 1
163+
local subSize = #content
164+
while subIndex <= subSize do
165+
-- Find next brace
166+
local subStart, subFinish = string.find(content, "^.-[^\\][{}]", subIndex)
167+
if subStart == nil then
168+
-- No more braces, all string
169+
coroutine.yield("string", string.sub(content, subIndex))
170+
break
171+
end
172+
173+
if isString then
174+
-- We are currently a string
175+
subIndex = subFinish + 1
176+
coroutine.yield("string", string.sub(content, subStart, subFinish))
132177

133-
if string.find(p1, "%.[%s%c]*$") and pT ~= "comment" then
134-
-- The previous was a . so we need to special case indexing things
135-
local parent = string.gsub(p2, Cleaner, "")
136-
local lib = lua_libraries[parent]
137-
if lib and lib[cleanTok] and not string.find(p3, "%.[%s%c]*$") then
138-
-- Indexing a builtin lib with existing item, treat as a builtin
139-
t2 = "builtin"
178+
-- This brace opens code
179+
isString = false
140180
else
141-
-- Indexing a non builtin, can't be treated as a keyword/builtin
142-
t2 = "iden"
181+
-- We are currently in code
182+
subIndex = subFinish
183+
local subContent = string.sub(content, subStart, subFinish-1)
184+
for innerToken, innerContent in lexer.scan(subContent) do
185+
coroutine.yield(innerToken, innerContent)
186+
end
187+
188+
-- This brace opens string/closes code
189+
isString = true
143190
end
144-
-- print("indexing",parent,"with",cleanTok,"as",t2)
145191
end
146192
end
193+
end
147194

148-
-- Record last 3 tokens for the indexing context check
149-
p3 = p2
150-
p2 = p1
151-
p1 = tok
152-
pT = t2
153-
return t2, tok
195+
-- Record last 3 tokens for the indexing context check
196+
previousContent3 = previousContent2
197+
previousContent2 = previousContent1
198+
previousContent1 = content
199+
previousToken = processedToken or rawToken
200+
if processedToken then
201+
coroutine.yield(processedToken, content)
154202
end
203+
break
204+
end
205+
206+
-- No matches found
207+
if not matched then
208+
return
155209
end
156-
-- No matches
157-
return nil
158210
end
159-
-- Reached end
160-
return nil
211+
212+
-- Completed the scan
213+
return
214+
end)
215+
216+
return function()
217+
if coroutine.status(thread) == "dead" then
218+
return
219+
end
220+
221+
local success, token, content = coroutine.resume(thread)
222+
if success and token then
223+
return token, content
224+
end
225+
226+
return
161227
end
162228
end
163229

wally.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "boatbomber/highlighter"
33
description = "RichText highlighting Lua code with a pure Lua lexer"
4-
version = "0.6.2"
4+
version = "0.7.0"
55
license = "MIT"
66
authors = ["boatbomber (https://boatbomber.com)"]
77
registry = "https://github.com/upliftgames/wally-index"

0 commit comments

Comments
 (0)