Skip to content

Commit ddc4569

Browse files
committed
Defines lr1 generator
1 parent 51787bd commit ddc4569

File tree

3 files changed

+495
-69
lines changed

3 files changed

+495
-69
lines changed

src/syntactes/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .token import Token
22
from .rule import Rule
33
from .grammar import Grammar
4-
from .generator import LR0Generator, SLRGenerator
4+
from .generator import LR0Generator, SLRGenerator, LR1Generator

src/syntactes/generator.py

Lines changed: 238 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,49 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Optional, Type
3+
14
from syntactes import Grammar, Token
25
from syntactes._action import Action
3-
from syntactes._item import LR0Item
4-
from syntactes._state import LR0State
5-
from syntactes.parsing_table import Entry, LR0ParsingTable, SLRParsingTable
6-
7-
8-
class LR0Generator:
9-
"""
10-
Generator of LR0 parsing tables.
11-
"""
6+
from syntactes._item import Item, LR0Item, LR1Item
7+
from syntactes._state import LR0State, LR1State, State
8+
from syntactes.parsing_table import (
9+
Entry,
10+
LR0ParsingTable,
11+
LR1ParsingTable,
12+
ParsingTable,
13+
SLRParsingTable,
14+
)
15+
16+
17+
class Generator(ABC):
18+
table_cls: Type[ParsingTable]
19+
state_cls: Type[State]
20+
item_cls: Type[Item]
1221

1322
def __init__(self, grammar: Grammar) -> None:
1423
self.grammar = grammar
1524

16-
def generate(self) -> LR0ParsingTable:
25+
@abstractmethod
26+
def closure(self, items: set[Item]) -> set[Item]:
27+
raise NotImplementedError()
28+
29+
@abstractmethod
30+
def goto(self, items: set[Item], token: Token) -> set[Item]:
31+
raise NotImplementedError()
32+
33+
def generate(self) -> ParsingTable:
1734
"""
18-
Generates an LR0 parsing table for the configured grammar.
35+
Generates an parsing table for the configured grammar.
1936
"""
2037
states, shift_entries = self._create_states_and_shift_entries()
2138
reduce_entries = self._create_reduce_entries(states)
2239

2340
entries = shift_entries | reduce_entries
2441

25-
table = LR0ParsingTable.from_entries(entries, self.grammar)
42+
table = self.table_cls.from_entries(entries, self.grammar)
2643

2744
return table
2845

29-
def closure(self, items: set[LR0Item]) -> set[LR0Item]:
30-
"""
31-
Computes and returns the closure for the given set of items.
32-
33-
The closure operation adds more items to a set of items when there
34-
is a dot to the left of a non-terminal symbol.
35-
36-
e.g.
37-
for any item S -> . E in the given items, closure adds E -> . T
38-
and T -> . x, where E -> T and T -> x are production rules.
39-
"""
40-
_set = {item for item in items}
41-
42-
for item in items:
43-
if item.dot_is_last():
44-
continue
45-
46-
new_items = self._get_related_items(item.after_dot)
47-
_set |= new_items
48-
49-
return _set
50-
51-
def goto(self, items: set[LR0Item], token: Token) -> set[LR0Item]:
52-
"""
53-
Computes and returns the GOTO set for the given set of items.
54-
55-
The goto operation creates a set where all items have the dot past the
56-
given symbol.
57-
"""
58-
_set: set[LR0Item] = set()
59-
60-
for item in items:
61-
if item.dot_is_last() or item.after_dot != token:
62-
continue
63-
64-
next_item = LR0Item(item.rule, item.position + 1)
65-
_set.add(next_item)
66-
67-
return self.closure(_set)
68-
69-
def get_states(self) -> set[LR0State]:
46+
def get_states(self) -> set[State]:
7047
"""
7148
Returns the set of automaton states for the configured grammar.
7249
"""
@@ -148,6 +125,68 @@ def _follow(self, symbol: Token) -> set[Token]:
148125

149126
return _set
150127

128+
@abstractmethod
129+
def _create_states_and_shift_entries(self) -> tuple[set[State], Entry]:
130+
raise NotImplementedError()
131+
132+
@abstractmethod
133+
def _create_reduce_entries(self, states: set[State]) -> set[Entry]:
134+
raise NotImplementedError()
135+
136+
137+
class LR0Generator(Generator):
138+
"""
139+
Generator of LR0 parsing tables.
140+
"""
141+
142+
table_cls = LR0ParsingTable
143+
state_cls = LR0State
144+
item_cls = LR0Item
145+
146+
def closure(self, items: set[LR0Item]) -> set[LR0Item]:
147+
"""
148+
Computes and returns the closure for the given set of items.
149+
150+
The closure operation adds more items to a set of items when there
151+
is a dot to the left of a non-terminal symbol.
152+
153+
e.g.
154+
for any item S -> . E in the given items, closure adds E -> . T
155+
and T -> . x, where E -> T and T -> x are production rules.
156+
"""
157+
_set = {item for item in items}
158+
__set = set()
159+
160+
while __set != _set:
161+
__set = {i for i in _set}
162+
163+
for item in items:
164+
if item.dot_is_last():
165+
continue
166+
167+
new_items = self._get_related_items(item.after_dot)
168+
_set |= new_items
169+
170+
return _set
171+
172+
def goto(self, items: set[LR0Item], token: Token) -> set[LR0Item]:
173+
"""
174+
Computes and returns the GOTO set for the given set of items.
175+
176+
The goto operation creates a set where all items have the dot past the
177+
given symbol.
178+
"""
179+
_set: set[LR0Item] = set()
180+
181+
for item in items:
182+
if item.dot_is_last() or item.after_dot != token:
183+
continue
184+
185+
next_item = LR0Item(item.rule, item.position + 1)
186+
_set.add(next_item)
187+
188+
return self.closure(_set)
189+
151190
def _get_related_items(self, symbol: Token) -> set[LR0Item]:
152191
"""
153192
e.g. the items X -> .g, Y -> .p would be returned for the below grammar rules:
@@ -249,20 +288,155 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
249288

250289

251290
class SLRGenerator(LR0Generator):
252-
def generate(self) -> SLRParsingTable:
291+
table_cls = SLRParsingTable
292+
293+
def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
253294
"""
254-
Generates an SLR parsing table for the configured grammar.
295+
Computes and returns the entries for reduce actions and the accept action.
255296
"""
256-
states, shift_entries = self._create_states_and_shift_entries()
257-
reduce_entries = self._create_reduce_entries(states)
297+
entries: set[Entry] = set()
258298

259-
entries = shift_entries | reduce_entries
299+
for state in states:
300+
for item in state.items:
301+
if item.after_dot == Token.eof():
302+
entries.add(Entry(state, Token.eof(), Action.accept()))
260303

261-
table = SLRParsingTable.from_entries(entries, self.grammar)
304+
if not item.dot_is_last():
305+
continue
262306

263-
return table
307+
for token in self._follow(item.rule.lhs):
308+
entries.add(Entry(state, token, Action.reduce(item.rule)))
264309

265-
def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
310+
return entries
311+
312+
313+
class LR1Generator(Generator):
314+
table_cls = LR1ParsingTable
315+
state_cls = LR1State
316+
item_cls = LR1Item
317+
318+
def closure(self, items: set[LR1Item]) -> set[LR1Item]:
319+
""" """
320+
_set = {item for item in items}
321+
__set = set()
322+
323+
while __set != _set:
324+
__set = {i for i in _set}
325+
326+
for item in __set:
327+
if item.dot_is_last():
328+
continue
329+
330+
if item.position + 1 < item.rule.rhs_len:
331+
next_symbol = item.rule.rhs[item.position + 1]
332+
else:
333+
next_symbol = None
334+
335+
new_items = self._get_related_items(
336+
item.after_dot, next_symbol, item.lookahead_token
337+
)
338+
_set |= new_items
339+
340+
return _set
341+
342+
def goto(self, items: set[LR1Item], token: Token) -> set[LR1Item]:
343+
""" """
344+
_set: set[LR1Item] = set()
345+
346+
for item in items:
347+
if item.dot_is_last() or item.after_dot != token:
348+
continue
349+
350+
next_item = LR1Item(item.rule, item.position + 1, item.lookahead_token)
351+
_set.add(next_item)
352+
353+
return self.closure(_set)
354+
355+
def _get_related_items(
356+
self, symbol: Token, next_symbol: Optional[Token], lookahead_token: Token
357+
) -> set[LR1Item]:
358+
_set: set[LR1Item] = set()
359+
360+
if next_symbol is None:
361+
lookaheads = (lookahead_token,)
362+
else:
363+
lookaheads = (next_symbol, lookahead_token)
364+
365+
for rule in self.grammar.rules:
366+
if rule.lhs != symbol:
367+
continue
368+
369+
for s in self._first(*lookaheads):
370+
_set.add(LR1Item(rule, 0, s))
371+
372+
if rule.rhs_len == 1 and not rule.rhs[0].is_terminal:
373+
_set |= self._get_related_items(rule.rhs[0], None, s)
374+
375+
return _set
376+
377+
def _create_states_and_shift_entries(self) -> tuple[set[LR1State], set[Entry]]:
378+
"""
379+
Computes and returns the states and entries for shift actions.
380+
"""
381+
states, entries = dict(), set()
382+
383+
initial_items = self.closure(
384+
{LR1Item(self.grammar.starting_rule, 0, Token.eof())}
385+
)
386+
initial_state = LR1State.from_items(initial_items)
387+
initial_state.set_number(1)
388+
states[initial_state] = 1
389+
390+
_states, _entries = dict(), set()
391+
while (_states, _entries) != (states, entries):
392+
_states = {s: n for s, n in states.items()}
393+
_entries = {e for e in entries}
394+
states, entries = self._extend_states_and_shift_entries(_states, _entries)
395+
396+
return set(states.keys()), entries
397+
398+
def _extend_states_and_shift_entries(
399+
self, states: dict[LR1State, int], entries: set[Entry]
400+
) -> tuple[dict[LR1State, int], set[Entry]]:
401+
"""
402+
Extends states and entries following the below algorithm:
403+
404+
```
405+
for each state S in states
406+
for each item A -> a.Xb in S
407+
J = goto(S, X)
408+
states.add(J)
409+
entries.add((S->J, X))
410+
```
411+
"""
412+
_states = {s: n for s, n in states.items()}
413+
_entries = {e for e in entries}
414+
415+
EOF = Token.eof()
416+
for state in states:
417+
for item in state.items:
418+
if item.dot_is_last():
419+
continue
420+
421+
if item.after_dot == EOF:
422+
state.set_final()
423+
continue
424+
425+
new_items = self.goto(state.items, item.after_dot)
426+
427+
if len(new_items) == 0:
428+
continue
429+
430+
new = LR1State.from_items(new_items)
431+
432+
number = _states.setdefault(new, len(_states) + 1)
433+
new.set_number(number)
434+
435+
_entries.add(Entry(state, item.after_dot, Action.shift(new)))
436+
437+
return _states, _entries
438+
439+
def _create_reduce_entries(self, states: set[LR1State]) -> set[Entry]:
266440
"""
267441
Computes and returns the entries for reduce actions and the accept action.
268442
"""
@@ -276,7 +450,8 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
276450
if not item.dot_is_last():
277451
continue
278452

279-
for token in self._follow(item.rule.lhs):
280-
entries.add(Entry(state, token, Action.reduce(item.rule)))
453+
entries.add(
454+
Entry(state, item.lookahead_token, Action.reduce(item.rule))
455+
)
281456

282457
return entries

0 commit comments

Comments
 (0)