Skip to content

Commit 240ba87

Browse files
committed
Optimized tokenizer.lua
`getPosition` Heavily decreased the performance due to using the `gsub` function. This change decreased the parsing time of a 27kb file from 4.35 seconds to just 0.1 seconds. `getPosition` also behaved incorrectly as is returned the wrong line position.
1 parent 5990884 commit 240ba87

File tree

1 file changed

+30
-6
lines changed

1 file changed

+30
-6
lines changed

src/prometheus/tokenizer.lua

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,8 @@ Tokenizer.EOF_TOKEN = {
4444
source = "<EOF>",
4545
}
4646

47-
local function getPosition(source, i)
48-
return source:sub(1, i):gsub("[^\n]", ""):len() + 1, i - source:sub(1, i):gsub("[^\r]", ""):len() + 1;
49-
end
50-
5147
local function token(self, startPos, kind, value)
52-
local line, linePos = getPosition(self.source, self.index);
48+
local line, linePos = self:getPosition(self.index);
5349
local annotations = self.annotations
5450
self.annotations = {};
5551
return {
@@ -65,14 +61,40 @@ local function token(self, startPos, kind, value)
6561
end
6662

6763
local function generateError(self, message)
68-
local line, linePos = getPosition(self.source, self.index);
64+
local line, linePos = self:getPosition(self.index);
6965
return "Lexing Error at Position " .. tostring(line) .. ":" .. tostring(linePos) .. ", " .. message;
7066
end
7167

7268
local function generateWarning(token, message)
7369
return "Warning at Position " .. tostring(token.line) .. ":" .. tostring(token.linePos) .. ", " .. message;
7470
end
7571

72+
function Tokenizer:getPosition(i)
73+
local column = self.columnMap[i]
74+
return column.id, column.charMap[i]
75+
end
76+
77+
--// Prepare columnMap for getPosition
78+
function Tokenizer:prepareGetPosition()
79+
local columnMap, column = {}, { charMap = {}, id = 1, length = 0 }
80+
81+
for index = 1, self.length do
82+
local character = string.sub(self.source, index, index) -- NOTE_1: this could use table.clone to reduce amount of NEWTABLE (if that causes any performance issues)
83+
84+
local columnLength = column.length + 1
85+
column.length = columnLength
86+
column.charMap[index] = columnLength
87+
88+
if character == "\n" then
89+
column = { charMap = {}, id = column.id + 1, length = 0 } -- NOTE_1
90+
end
91+
92+
columnMap[index] = column
93+
end
94+
95+
self.columnMap = columnMap
96+
end
97+
7698
-- Constructor for Tokenizer
7799
function Tokenizer:new(settings)
78100
local luaVersion = (settings and (settings.luaVersion or settings.LuaVersion)) or LuaVersion.LuaU;
@@ -132,12 +154,14 @@ function Tokenizer:reset()
132154
self.length = 0;
133155
self.source = "";
134156
self.annotations = {};
157+
self.columnMap = {};
135158
end
136159

137160
-- Append String to this Tokenizer
138161
function Tokenizer:append(code)
139162
self.source = self.source .. code
140163
self.length = self.length + code:len();
164+
self:prepareGetPosition();
141165
end
142166

143167
-- Function to peek the n'th char in the source of the tokenizer

0 commit comments

Comments
 (0)