Skip to content

Commit 60ebeb8

Browse files
committed
Add parser for simple tag filter language
1 parent 7fb9bc7 commit 60ebeb8

File tree

4 files changed

+621
-0
lines changed

4 files changed

+621
-0
lines changed

lua/themepark/lexer.lua

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
-- ---------------------------------------------------------------------------
2+
--
3+
-- Osm2pgsql Themepark
4+
--
5+
-- A framework for pluggable osm2pgsql config files.
6+
--
7+
-- ---------------------------------------------------------------------------
8+
--
9+
-- lib/themepark/lexer.lua
10+
--
11+
-- ---------------------------------------------------------------------------
12+
--
13+
-- Copyright 2024 Jochen Topf <[email protected]>
14+
--
15+
-- Licensed under the Apache License, Version 2.0 (the "License");
16+
-- you may not use this file except in compliance with the License.
17+
-- You may obtain a copy of the License at
18+
--
19+
-- https://www.apache.org/licenses/LICENSE-2.0
20+
--
21+
-- Unless required by applicable law or agreed to in writing, software
22+
-- distributed under the License is distributed on an "AS IS" BASIS,
23+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24+
-- See the License for the specific language governing permissions and
25+
-- limitations under the License.
26+
--
27+
-- ---------------------------------------------------------------------------
28+
29+
local Symbol = {
30+
31+
new = function(self, stype, sidx, eidx, value)
32+
local new_object = { type = stype, value = value, start_index = sidx, end_index = eidx }
33+
setmetatable(new_object, self)
34+
self.__index = self
35+
return new_object
36+
end,
37+
38+
__tostring = function(self)
39+
local out = self.type
40+
if self.value then
41+
out = out .. '[' .. tostring(self.value) .. ']'
42+
end
43+
return out .. '(' .. self.start_index .. '-' .. self.end_index .. ')'
44+
end,
45+
46+
__eq = function(self, other)
47+
return self.type == other.type and
48+
self.start_index == other.start_index and
49+
self.end_index == other.end_index and
50+
self.value == other.value
51+
end,
52+
53+
}
54+
55+
local gen_matcher = function(pattern, extra_length, stype)
56+
return function(self, input)
57+
local m, rest = input:match(pattern)
58+
if m then
59+
local next_index = self.index + #m + (extra_length or 0)
60+
local symbol
61+
if stype then
62+
symbol = Symbol:new(stype, self.index + 1, next_index + 1, m)
63+
else
64+
symbol = Symbol:new(m, self.index + 1, next_index + 1)
65+
end
66+
self.index = next_index
67+
return symbol, rest
68+
end
69+
end
70+
end
71+
72+
local Lexer = {
73+
matchers = {},
74+
75+
Symbol = Symbol,
76+
77+
new = function(self)
78+
local new_object = { index = 0 }
79+
setmetatable(new_object, self)
80+
self.__index = self
81+
return new_object
82+
end,
83+
84+
add_matcher = function(self, pattern, extra_length, stype)
85+
self.matchers[#self.matchers + 1] = gen_matcher(pattern, extra_length, stype)
86+
end,
87+
88+
remove_whitespace = function(self, input)
89+
local output = input:gsub('^ +', '')
90+
self.index = self.index + (#input - #output)
91+
return output
92+
end,
93+
94+
next = function(self, input)
95+
for _, mfunc in ipairs(self.matchers) do
96+
local token, rest = mfunc(self, input)
97+
if token then
98+
return token, rest
99+
end
100+
end
101+
end,
102+
103+
run = function(self, input)
104+
local original_input = input
105+
local tokens = {}
106+
107+
while #input > 0 do
108+
input = self:remove_whitespace(input)
109+
if #input == 0 then
110+
break
111+
end
112+
113+
local token, rest = self:next(input)
114+
if token then
115+
tokens[#tokens + 1] = token
116+
input = rest
117+
else
118+
error("error parsing OSM filter expression:\n"
119+
.. original_input
120+
.. "\n"
121+
.. string.rep(' ', self.index)
122+
.. '^', 0)
123+
end
124+
end
125+
126+
return tokens
127+
end,
128+
129+
}
130+
131+
Lexer:add_matcher( [[^"([^"]*)"(.*)$]], 2, 'key')
132+
Lexer:add_matcher( [[^'([^']*)'(.*)$]], 2, 'str')
133+
Lexer:add_matcher( [[^(!?=)(.*)$]] )
134+
Lexer:add_matcher( [[^(and)(.*)$]] )
135+
Lexer:add_matcher( [[^(or)(.*)$]] )
136+
Lexer:add_matcher( [[^(not)(.*)$]] )
137+
Lexer:add_matcher( [[^(in)(.*)$]] )
138+
Lexer:add_matcher( [[^([()])(.*)$]] )
139+
Lexer:add_matcher( [[^(,)(.*)$]] )
140+
Lexer:add_matcher( [[^(true)(.*)$]] )
141+
Lexer:add_matcher( [[^(false)(.*)$]] )
142+
143+
-- ---------------------------------------------------------------------------
144+
145+
return {
146+
run = function(input)
147+
return Lexer:new():run(input)
148+
end,
149+
150+
symbol = function(stype, sidx, eidx, value)
151+
return Symbol:new(stype, sidx, eidx, value)
152+
end,
153+
}
154+
155+
-- ---------------------------------------------------------------------------

lua/themepark/parser.lua

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
-- ---------------------------------------------------------------------------
2+
--
3+
-- Osm2pgsql Themepark
4+
--
5+
-- A framework for pluggable osm2pgsql config files.
6+
--
7+
-- ---------------------------------------------------------------------------
8+
--
9+
-- lib/themepark/parser.lua
10+
--
11+
-- ---------------------------------------------------------------------------
12+
--
13+
-- Copyright 2024 Jochen Topf <[email protected]>
14+
--
15+
-- Licensed under the Apache License, Version 2.0 (the "License");
16+
-- you may not use this file except in compliance with the License.
17+
-- You may obtain a copy of the License at
18+
--
19+
-- https://www.apache.org/licenses/LICENSE-2.0
20+
--
21+
-- Unless required by applicable law or agreed to in writing, software
22+
-- distributed under the License is distributed on an "AS IS" BASIS,
23+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24+
-- See the License for the specific language governing permissions and
25+
-- limitations under the License.
26+
--
27+
-- ---------------------------------------------------------------------------
28+
29+
local lexer = require 'themepark/lexer'
30+
31+
local Parser = {
32+
33+
tokens = {},
34+
35+
new = function(self)
36+
local new_object = {}
37+
setmetatable(new_object, self)
38+
self.__index = self
39+
return new_object
40+
end,
41+
42+
peek = function(self)
43+
return self.tokens[1]
44+
end,
45+
46+
get_next = function(self)
47+
return table.remove(self.tokens, 1)
48+
end,
49+
50+
match_next = function(self, stype)
51+
local next_token = self:peek()
52+
53+
if next_token and next_token.type == stype then
54+
self:get_next()
55+
return true
56+
end
57+
58+
return false
59+
end,
60+
61+
failed = function(self, message)
62+
local token = self:peek()
63+
64+
if token then
65+
error(message .. " at\n" .. self.expression .. "\n"
66+
.. string.rep(' ', token.start_index - 1)
67+
.. '^', 0)
68+
else
69+
error(message .. " at end of expression", 0)
70+
end
71+
end,
72+
73+
parse_primary = function(self)
74+
if self:match_next('true') then
75+
return function(tags) return true end
76+
end
77+
78+
if self:match_next('false') then
79+
return function(tags) return false end
80+
end
81+
82+
local token = self:peek()
83+
if self:match_next('str') then
84+
return function(tags) return token.value end
85+
end
86+
87+
if self:match_next('key') then
88+
return function(tags) return tags[token.value] end
89+
end
90+
91+
self:failed('invalid expression')
92+
end,
93+
94+
parse_array = function(self)
95+
if not self:match_next('(') then
96+
self:failed('expected opening paren')
97+
end
98+
99+
local array = {}
100+
for token in function() return self:peek() end do
101+
if token.type == ')' then
102+
self:get_next()
103+
return array
104+
end
105+
array[#array + 1] = self:parse_primary()
106+
107+
local sep = self:peek()
108+
if not sep then
109+
break
110+
elseif sep.type == ',' then
111+
self:get_next()
112+
elseif sep.type ~= ')' then
113+
break
114+
end
115+
end
116+
117+
self:failed('expected comma or closing paren')
118+
end,
119+
120+
parse_condition = function(self)
121+
local expr = self:parse_primary()
122+
123+
if self:match_next('=') then
124+
local next_expr = self:parse_primary()
125+
return function(tags) return expr(tags) == next_expr(tags) end
126+
end
127+
128+
if self:match_next('!=') then
129+
local next_expr = self:parse_primary()
130+
return function(tags)
131+
local value = expr(tags)
132+
return value ~= nil and value ~= next_expr(tags)
133+
end
134+
end
135+
136+
if self:match_next('in') then
137+
local next_expressions = self:parse_array()
138+
return function(tags)
139+
local value_left = expr(tags)
140+
for _, e in ipairs(next_expressions) do
141+
if value_left == e(tags) then
142+
return true
143+
end
144+
end
145+
return false
146+
end
147+
end
148+
149+
if self:match_next('not') then
150+
if self:match_next('in') then
151+
local next_expressions = self:parse_array()
152+
return function(tags)
153+
local value_left = expr(tags)
154+
for _, e in ipairs(next_expressions) do
155+
if value_left == e(tags) then
156+
return false
157+
end
158+
end
159+
return true
160+
end
161+
else
162+
self:failed('expected "in" after "not"')
163+
end
164+
end
165+
166+
return function(tags) return not(not(expr(tags))) end
167+
end,
168+
169+
parse_factor = function(self)
170+
if self:match_next('not') then
171+
local next_expr = self:parse_factor()
172+
return function(tags) return not next_expr(tags) end
173+
end
174+
175+
if self:match_next('(') then
176+
local next_expr = self:parse_expression()
177+
if not self:match_next(')') then
178+
self:failed('expected closing paren')
179+
end
180+
return function(tags) return next_expr(tags) end
181+
end
182+
183+
local next_expr = self:parse_condition()
184+
185+
return function(tags) return next_expr(tags) end
186+
end,
187+
188+
parse_term = function(self)
189+
local expr = self:parse_factor()
190+
191+
while self:match_next('and') do
192+
local this_expr = expr
193+
local next_expr = self:parse_factor()
194+
expr = function(tags) return this_expr(tags) and next_expr(tags) end
195+
end
196+
197+
return expr
198+
end,
199+
200+
parse_expression = function(self)
201+
local expr = self:parse_term()
202+
203+
while self:match_next('or') do
204+
local this_expr = expr
205+
local next_expr = self:parse_term()
206+
expr = function(tags) return this_expr(tags) or next_expr(tags) end
207+
end
208+
209+
return expr
210+
end,
211+
212+
parse = function(self, expression)
213+
self.expression = expression
214+
self.tokens = lexer.run(expression)
215+
local expr = self:parse_expression()
216+
217+
if self:peek() then
218+
self:failed('invalid expression')
219+
end
220+
221+
return expr
222+
end,
223+
224+
}
225+
226+
return {
227+
parse = function(input)
228+
return Parser:new():parse(input)
229+
end
230+
}
231+
232+
-- ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)