Skip to content

Commit a2fcb6c

Browse files
committed
Add additional functionality to tokenizer
1 parent 2621337 commit a2fcb6c

File tree

6 files changed

+50
-12
lines changed

6 files changed

+50
-12
lines changed

examples/01_inline_parsing.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ start: mov 10 r0
1717
add r0 r1
1818
jmp start
1919
]]
20-
local tokenizer = LuASM.string_tokenizer(src)
20+
local tokenizer = LuASM:string_tokenizer(src)
2121

2222
-- 4. Parse
2323
local result = asm:parse(tokenizer)

examples/02_file_parsing.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ local instructions = {
1212
local asm = LuASM:new(instructions, {})
1313

1414
-- 3. Tokenize a source string
15-
local tokenizer = LuASM.file_tokenizer("./data/02_data.lasm")
15+
local tokenizer = LuASM:file_tokenizer("./data/02_data.lasm")
1616

1717
-- 4. Parse
1818
local result = asm:parse(tokenizer)

examples/03_custom_arguments.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ local src = [[
2020
mov reg0, reg1
2121
print "Hello"
2222
]]
23-
local tokenizer = LuASM.string_tokenizer(src)
23+
local tokenizer = LuASM:string_tokenizer(src)
2424

2525
-- 4. Parse
2626
local result = asm:parse(tokenizer)

examples/05_comments.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ start: mov 10 r0 # This is a comment
1717
add r0 r1 ; This is another comment
1818
jmp start
1919
]]
20-
local tokenizer = LuASM.string_tokenizer(src)
20+
local tokenizer = LuASM:string_tokenizer(src)
2121

2222
-- 4. Parse
2323
local result = asm:parse(tokenizer)

examples/06_custom_comments.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ start: mov 10 r0 = My custom comment
1919
add r0 r1 = This just works
2020
jmp start
2121
]]
22-
local tokenizer = LuASM.string_tokenizer(src)
22+
local tokenizer = LuASM:string_tokenizer(src)
2323

2424
-- 4. Parse
2525
local result = asm:parse(tokenizer)

src/luasm.lua

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,23 @@ function Tokenizer.get_next_line()
109109
return false
110110
end
111111

112+
--- @return boolean
113+
function Tokenizer:has_next_line()
114+
return false
115+
end
116+
117+
--- @return string|nil
118+
function Tokenizer:get_label()
119+
return nil
120+
end
121+
112122
--- Creates a new tokenizer without a specific implementation.
113123
--- @return table A tokenizer instance (needs a concrete `get_next_line` implementation).
114-
function Tokenizer:new()
124+
function Tokenizer:new(luasm)
115125
local obj = {}
116126

127+
obj.luasm = luasm
128+
117129
setmetatable(obj, self)
118130
self.__index = self
119131

@@ -123,13 +135,13 @@ end
123135
--- Reads in a file and returns a tokenizer for that file.
124136
--- @param path string Path to the file to read.
125137
--- @return table|nil Tokenizer instance or `nil` if the file cannot be opened.
126-
function LuASM.file_tokenizer(path)
138+
function LuASM:file_tokenizer(path)
127139
local file = io.open(path, "r")
128140
if file == nil then
129141
return nil
130142
end
131143

132-
local tokenizer = LuASM.string_tokenizer(file:read("*a"))
144+
local tokenizer = self:string_tokenizer(file:read("*a"))
133145

134146
file:close()
135147

@@ -139,13 +151,15 @@ end
139151
--- Reads in a string of the asm and returns a tokenizer for that file.
140152
--- @param input string The complete ASM source as a string.
141153
--- @return table Tokenizer instance.
142-
function LuASM.string_tokenizer(input)
154+
function LuASM:string_tokenizer(input)
143155
local tokenizer = Tokenizer:new()
144156

145157
tokenizer.input = input
146158
tokenizer.cursor = 1 -- byte index inside `input`
147159
tokenizer.current_line = 1 -- line counter (1‑based)
148160

161+
tokenizer.line = nil
162+
149163
-- Concrete implementation of `get_next_line` for a string source.
150164
tokenizer.get_next_line = function()
151165
if #tokenizer.input <= tokenizer.cursor then
@@ -156,12 +170,38 @@ function LuASM.string_tokenizer(input)
156170

157171
local line = trim(string.sub(tokenizer.input, tokenizer.cursor, endIndex))
158172

173+
-- Remove comment from the line
174+
if self.settings.comment ~= nil then
175+
line = line:gsub(self.settings.comment, "")
176+
end
177+
159178
tokenizer.cursor = endIndex + 1
160179
tokenizer.current_line = tokenizer.current_line + 1
161180

162181
return line
163182
end
164183

184+
tokenizer.has_line = function()
185+
tokenizer.line = tokenizer.get_next_line()
186+
187+
return tokenizer.line ~= nil
188+
end
189+
190+
tokenizer.get_label = function()
191+
if self.settings.label == nil then
192+
return nil
193+
end
194+
195+
local label, rest = tokenizer.line:match(self.settings.label)
196+
197+
if label ~= nil then
198+
tokenizer.line = rest
199+
tokenizer.cursor = tokenizer.cursor + #label
200+
end
201+
202+
return label
203+
end
204+
165205
return tokenizer
166206
end
167207

@@ -222,9 +262,7 @@ function LuASM:parse(tokenizer)
222262
parsed_lines = 0
223263
}
224264

225-
while tokenizer:has_next_line() do
226-
tokenizer:goto_next_line() -- Maybe there should be an error if not everything was parsed
227-
265+
while tokenizer:has_line() do
228266
parse_data.parsed_lines = parse_data.parsed_lines + 1
229267

230268
local label = tokenizer:get_label()

0 commit comments

Comments
 (0)