Skip to content

Commit c3fe9a0

Browse files
committed
Defines lr1 generator
1 parent 91c2e68 commit c3fe9a0

File tree

3 files changed

+474
-94
lines changed

3 files changed

+474
-94
lines changed

src/syntactes/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .token import Token
22
from .rule import Rule
33
from .grammar import Grammar
4-
from .generator import LR0Generator, SLRGenerator
4+
from .generator import LR0Generator, SLRGenerator, LR1Generator

src/syntactes/generator.py

Lines changed: 217 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,49 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Optional, Type
3+
14
from syntactes import Grammar, Token
25
from syntactes._action import Action
3-
from syntactes._item import LR0Item
4-
from syntactes._state import LR0State
5-
from syntactes.parsing_table import Entry, LR0ParsingTable, SLRParsingTable
6-
7-
8-
class LR0Generator:
9-
"""
10-
Generator of LR0 parsing tables.
11-
"""
6+
from syntactes._item import Item, LR0Item, LR1Item
7+
from syntactes._state import LR0State, LR1State, State
8+
from syntactes.parsing_table import (
9+
Entry,
10+
LR0ParsingTable,
11+
LR1ParsingTable,
12+
ParsingTable,
13+
SLRParsingTable,
14+
)
15+
16+
17+
class Generator(ABC):
18+
table_cls: Type[ParsingTable]
19+
state_cls: Type[State]
20+
item_cls: Type[Item]
1221

1322
def __init__(self, grammar: Grammar) -> None:
1423
self.grammar = grammar
1524

16-
def generate(self) -> LR0ParsingTable:
25+
@abstractmethod
26+
def closure(self, items: set[Item]) -> set[Item]:
27+
raise NotImplementedError()
28+
29+
@abstractmethod
30+
def goto(self, items: set[Item], token: Token) -> set[Item]:
31+
raise NotImplementedError()
32+
33+
def generate(self) -> ParsingTable:
1734
"""
18-
Generates an LR0 parsing table for the configured grammar.
35+
Generates an parsing table for the configured grammar.
1936
"""
2037
states, shift_entries = self._create_states_and_shift_entries()
2138
reduce_entries = self._create_reduce_entries(states)
2239

2340
entries = shift_entries | reduce_entries
2441

25-
table = LR0ParsingTable.from_entries(entries, self.grammar)
42+
table = self.table_cls.from_entries(entries, self.grammar)
2643

2744
return table
2845

29-
def closure(self, items: set[LR0Item]) -> set[LR0Item]:
30-
"""
31-
Computes and returns the closure for the given set of items.
32-
33-
The closure operation adds more items to a set of items when there
34-
is a dot to the left of a non-terminal symbol.
35-
36-
e.g.
37-
for any item S -> . E in the given items, closure adds E -> . T
38-
and T -> . x, where E -> T and T -> x are production rules.
39-
"""
40-
_set = {item for item in items}
41-
42-
for item in items:
43-
if item.dot_is_last():
44-
continue
45-
46-
new_items = self._get_related_items(item.after_dot)
47-
_set |= new_items
48-
49-
return _set
50-
51-
def goto(self, items: set[LR0Item], token: Token) -> set[LR0Item]:
52-
"""
53-
Computes and returns the GOTO set for the given set of items.
54-
55-
The goto operation creates a set where all items have the dot past the
56-
given symbol.
57-
"""
58-
_set: set[LR0Item] = set()
59-
60-
for item in items:
61-
if item.dot_is_last() or item.after_dot != token:
62-
continue
63-
64-
next_item = LR0Item(item.rule, item.position + 1)
65-
_set.add(next_item)
66-
67-
return self.closure(_set)
68-
69-
def get_states(self) -> set[LR0State]:
46+
def get_states(self) -> set[State]:
7047
"""
7148
Returns the set of automaton states for the configured grammar.
7249
"""
@@ -148,33 +125,14 @@ def _follow(self, symbol: Token) -> set[Token]:
148125

149126
return _set
150127

151-
def _get_related_items(self, symbol: Token) -> set[LR0Item]:
152-
"""
153-
e.g. the items X -> .g, Y -> .p would be returned for the below grammar rules:
154-
1. X -> g
155-
2. X -> Y
156-
3. Y -> p
157-
where 'g' and 'p' are terminals.
158-
"""
159-
_set: set[LR0Item] = set()
160-
161-
for rule in self.grammar.rules:
162-
if rule.lhs == symbol:
163-
_set.add(LR0Item(rule, 0))
164-
165-
if rule.rhs_len == 1 and not rule.rhs[0].is_terminal:
166-
_set |= self._get_related_items(rule.rhs[0])
167-
168-
return _set
169-
170-
def _create_states_and_shift_entries(self) -> tuple[set[LR0State], set[Entry]]:
128+
def _create_states_and_shift_entries(self) -> tuple[set[State], set[Entry]]:
171129
"""
172130
Computes and returns the states and entries for shift actions.
173131
"""
174132
states, entries = dict(), set()
175133

176-
initial_items = self.closure({LR0Item(self.grammar.starting_rule, 0)})
177-
initial_state = LR0State.from_items(initial_items)
134+
initial_items = self._create_initial_items()
135+
initial_state = self.state_cls.from_items(initial_items)
178136
initial_state.set_number(1)
179137
states[initial_state] = 1
180138

@@ -187,8 +145,8 @@ def _create_states_and_shift_entries(self) -> tuple[set[LR0State], set[Entry]]:
187145
return set(states.keys()), entries
188146

189147
def _extend_states_and_shift_entries(
190-
self, states: dict[LR0State, int], entries: set[Entry]
191-
) -> tuple[dict[LR0State, int], set[Entry]]:
148+
self, states: dict[State, int], entries: set[Entry]
149+
) -> tuple[dict[State, int], set[Entry]]:
192150
"""
193151
Extends states and entries following the below algorithm:
194152
@@ -218,7 +176,7 @@ def _extend_states_and_shift_entries(
218176
if len(new_items) == 0:
219177
continue
220178

221-
new = LR0State.from_items(new_items)
179+
new = self.state_cls.from_items(new_items)
222180

223181
number = _states.setdefault(new, len(_states) + 1)
224182
new.set_number(number)
@@ -227,6 +185,90 @@ def _extend_states_and_shift_entries(
227185

228186
return _states, _entries
229187

188+
@abstractmethod
189+
def _create_initial_items(self) -> set[Item]:
190+
raise NotImplementedError()
191+
192+
@abstractmethod
193+
def _create_reduce_entries(self, states: set[State]) -> set[Entry]:
194+
raise NotImplementedError()
195+
196+
197+
class LR0Generator(Generator):
198+
"""
199+
Generator of LR0 parsing tables.
200+
"""
201+
202+
table_cls = LR0ParsingTable
203+
state_cls = LR0State
204+
item_cls = LR0Item
205+
206+
def closure(self, items: set[LR0Item]) -> set[LR0Item]:
207+
"""
208+
Computes and returns the closure for the given set of items.
209+
210+
The closure operation adds more items to a set of items when there
211+
is a dot to the left of a non-terminal symbol.
212+
213+
e.g.
214+
for any item S -> . E in the given items, closure adds E -> . T
215+
and T -> . x, where E -> T and T -> x are production rules.
216+
"""
217+
_set = {item for item in items}
218+
__set = set()
219+
220+
while __set != _set:
221+
__set = {i for i in _set}
222+
223+
for item in items:
224+
if item.dot_is_last():
225+
continue
226+
227+
new_items = self._get_related_items(item.after_dot)
228+
_set |= new_items
229+
230+
return _set
231+
232+
def goto(self, items: set[LR0Item], token: Token) -> set[LR0Item]:
233+
"""
234+
Computes and returns the GOTO set for the given set of items.
235+
236+
The goto operation creates a set where all items have the dot past the
237+
given symbol.
238+
"""
239+
_set: set[LR0Item] = set()
240+
241+
for item in items:
242+
if item.dot_is_last() or item.after_dot != token:
243+
continue
244+
245+
next_item = LR0Item(item.rule, item.position + 1)
246+
_set.add(next_item)
247+
248+
return self.closure(_set)
249+
250+
def _get_related_items(self, symbol: Token) -> set[LR0Item]:
251+
"""
252+
e.g. the items X -> .g, Y -> .p would be returned for the below grammar rules:
253+
1. X -> g
254+
2. X -> Y
255+
3. Y -> p
256+
where 'g' and 'p' are terminals.
257+
"""
258+
_set: set[LR0Item] = set()
259+
260+
for rule in self.grammar.rules:
261+
if rule.lhs == symbol:
262+
_set.add(LR0Item(rule, 0))
263+
264+
if rule.rhs_len == 1 and not rule.rhs[0].is_terminal:
265+
_set |= self._get_related_items(rule.rhs[0])
266+
267+
return _set
268+
269+
def _create_initial_items(self) -> set[LR0Item]:
270+
return self.closure({LR0Item(self.grammar.starting_rule, 0)})
271+
230272
def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
231273
"""
232274
Computes and returns the entries for reduce actions and the accept action.
@@ -249,20 +291,103 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
249291

250292

251293
class SLRGenerator(LR0Generator):
252-
def generate(self) -> SLRParsingTable:
294+
table_cls = SLRParsingTable
295+
296+
def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
253297
"""
254-
Generates an SLR parsing table for the configured grammar.
298+
Computes and returns the entries for reduce actions and the accept action.
255299
"""
256-
states, shift_entries = self._create_states_and_shift_entries()
257-
reduce_entries = self._create_reduce_entries(states)
300+
entries: set[Entry] = set()
258301

259-
entries = shift_entries | reduce_entries
302+
for state in states:
303+
for item in state.items:
304+
if item.after_dot == Token.eof():
305+
entries.add(Entry(state, Token.eof(), Action.accept()))
260306

261-
table = SLRParsingTable.from_entries(entries, self.grammar)
307+
if not item.dot_is_last():
308+
continue
262309

263-
return table
310+
for token in self._follow(item.rule.lhs):
311+
entries.add(Entry(state, token, Action.reduce(item.rule)))
264312

265-
def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
313+
return entries
314+
315+
316+
class LR1Generator(Generator):
317+
table_cls = LR1ParsingTable
318+
state_cls = LR1State
319+
item_cls = LR1Item
320+
321+
def closure(self, items: set[LR1Item]) -> set[LR1Item]:
322+
"""
323+
Computes and returns the closure for the given set of items.
324+
325+
The closure operation adds more items to a set of items when there
326+
is a dot to the left of a non-terminal symbol.
327+
"""
328+
_set = {item for item in items}
329+
__set = set()
330+
331+
while __set != _set:
332+
__set = {i for i in _set}
333+
334+
for item in __set:
335+
if item.dot_is_last():
336+
continue
337+
338+
if item.position + 1 < item.rule.rhs_len:
339+
next_symbol = item.rule.rhs[item.position + 1]
340+
else:
341+
next_symbol = None
342+
343+
new_items = self._get_related_items(
344+
item.after_dot, next_symbol, item.lookahead_token
345+
)
346+
_set |= new_items
347+
348+
return _set
349+
350+
def goto(self, items: set[LR1Item], token: Token) -> set[LR1Item]:
351+
"""
352+
Computes and returns the GOTO set for the given set of items.
353+
354+
The goto operation creates a set where all items have the dot past the
355+
given symbol.
356+
"""
357+
_set: set[LR1Item] = set()
358+
359+
for item in items:
360+
if item.dot_is_last() or item.after_dot != token:
361+
continue
362+
363+
next_item = LR1Item(item.rule, item.position + 1, item.lookahead_token)
364+
_set.add(next_item)
365+
366+
return self.closure(_set)
367+
368+
def _get_related_items(
369+
self, symbol: Token, next_symbol: Optional[Token], lookahead_token: Token
370+
) -> set[LR1Item]:
371+
_set: set[LR1Item] = set()
372+
373+
if next_symbol is None:
374+
lookaheads = (lookahead_token,)
375+
else:
376+
lookaheads = (next_symbol, lookahead_token)
377+
378+
for rule in self.grammar.rules:
379+
if rule.lhs != symbol:
380+
continue
381+
382+
for s in self._first(*lookaheads):
383+
_set.add(LR1Item(rule, 0, s))
384+
385+
if rule.rhs_len == 1 and not rule.rhs[0].is_terminal:
386+
_set |= self._get_related_items(rule.rhs[0], None, s)
387+
388+
return _set
389+
390+
def _create_reduce_entries(self, states: set[LR1State]) -> set[Entry]:
266391
"""
267392
Computes and returns the entries for reduce actions and the accept action.
268393
"""
@@ -276,7 +401,11 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
276401
if not item.dot_is_last():
277402
continue
278403

279-
for token in self._follow(item.rule.lhs):
280-
entries.add(Entry(state, token, Action.reduce(item.rule)))
404+
entries.add(
405+
Entry(state, item.lookahead_token, Action.reduce(item.rule))
406+
)
281407

282408
return entries
409+
410+
def _create_initial_items(self) -> set[LR1Item]:
411+
return self.closure({LR1Item(self.grammar.starting_rule, 0, Token.eof())})

0 commit comments

Comments
 (0)