1+ from abc import ABC , abstractmethod
2+ from typing import Optional , Type
3+
14from syntactes import Grammar , Token
25from syntactes ._action import Action
3- from syntactes ._item import LR0Item
4- from syntactes ._state import LR0State
5- from syntactes .parsing_table import Entry , LR0ParsingTable , SLRParsingTable
6-
7-
8- class LR0Generator :
9- """
10- Generator of LR0 parsing tables.
11- """
6+ from syntactes ._item import Item , LR0Item , LR1Item
7+ from syntactes ._state import LR0State , LR1State , State
8+ from syntactes .parsing_table import (
9+ Entry ,
10+ LR0ParsingTable ,
11+ LR1ParsingTable ,
12+ ParsingTable ,
13+ SLRParsingTable ,
14+ )
15+
16+
17+ class Generator (ABC ):
18+ table_cls : Type [ParsingTable ]
19+ state_cls : Type [State ]
20+ item_cls : Type [Item ]
1221
1322 def __init__ (self , grammar : Grammar ) -> None :
1423 self .grammar = grammar
1524
16- def generate (self ) -> LR0ParsingTable :
25+ @abstractmethod
26+ def closure (self , items : set [Item ]) -> set [Item ]:
27+ raise NotImplementedError ()
28+
29+ @abstractmethod
30+ def goto (self , items : set [Item ], token : Token ) -> set [Item ]:
31+ raise NotImplementedError ()
32+
33+ def generate (self ) -> ParsingTable :
1734 """
18- Generates an LR0 parsing table for the configured grammar.
35+ Generates an parsing table for the configured grammar.
1936 """
2037 states , shift_entries = self ._create_states_and_shift_entries ()
2138 reduce_entries = self ._create_reduce_entries (states )
2239
2340 entries = shift_entries | reduce_entries
2441
25- table = LR0ParsingTable .from_entries (entries , self .grammar )
42+ table = self . table_cls .from_entries (entries , self .grammar )
2643
2744 return table
2845
29- def closure (self , items : set [LR0Item ]) -> set [LR0Item ]:
30- """
31- Computes and returns the closure for the given set of items.
32-
33- The closure operation adds more items to a set of items when there
34- is a dot to the left of a non-terminal symbol.
35-
36- e.g.
37- for any item S -> . E in the given items, closure adds E -> . T
38- and T -> . x, where E -> T and T -> x are production rules.
39- """
40- _set = {item for item in items }
41-
42- for item in items :
43- if item .dot_is_last ():
44- continue
45-
46- new_items = self ._get_related_items (item .after_dot )
47- _set |= new_items
48-
49- return _set
50-
51- def goto (self , items : set [LR0Item ], token : Token ) -> set [LR0Item ]:
52- """
53- Computes and returns the GOTO set for the given set of items.
54-
55- The goto operation creates a set where all items have the dot past the
56- given symbol.
57- """
58- _set : set [LR0Item ] = set ()
59-
60- for item in items :
61- if item .dot_is_last () or item .after_dot != token :
62- continue
63-
64- next_item = LR0Item (item .rule , item .position + 1 )
65- _set .add (next_item )
66-
67- return self .closure (_set )
68-
69- def get_states (self ) -> set [LR0State ]:
46+ def get_states (self ) -> set [State ]:
7047 """
7148 Returns the set of automaton states for the configured grammar.
7249 """
@@ -148,33 +125,14 @@ def _follow(self, symbol: Token) -> set[Token]:
148125
149126 return _set
150127
151- def _get_related_items (self , symbol : Token ) -> set [LR0Item ]:
152- """
153- e.g. the items X -> .g, Y -> .p would be returned for the below grammar rules:
154- 1. X -> g
155- 2. X -> Y
156- 3. Y -> p
157- where 'g' and 'p' are terminals.
158- """
159- _set : set [LR0Item ] = set ()
160-
161- for rule in self .grammar .rules :
162- if rule .lhs == symbol :
163- _set .add (LR0Item (rule , 0 ))
164-
165- if rule .rhs_len == 1 and not rule .rhs [0 ].is_terminal :
166- _set |= self ._get_related_items (rule .rhs [0 ])
167-
168- return _set
169-
170- def _create_states_and_shift_entries (self ) -> tuple [set [LR0State ], set [Entry ]]:
128+ def _create_states_and_shift_entries (self ) -> tuple [set [State ], set [Entry ]]:
171129 """
172130 Computes and returns the states and entries for shift actions.
173131 """
174132 states , entries = dict (), set ()
175133
176- initial_items = self .closure ({ LR0Item ( self . grammar . starting_rule , 0 )} )
177- initial_state = LR0State .from_items (initial_items )
134+ initial_items = self ._create_initial_items ( )
135+ initial_state = self . state_cls .from_items (initial_items )
178136 initial_state .set_number (1 )
179137 states [initial_state ] = 1
180138
@@ -187,8 +145,8 @@ def _create_states_and_shift_entries(self) -> tuple[set[LR0State], set[Entry]]:
187145 return set (states .keys ()), entries
188146
189147 def _extend_states_and_shift_entries (
190- self , states : dict [LR0State , int ], entries : set [Entry ]
191- ) -> tuple [dict [LR0State , int ], set [Entry ]]:
148+ self , states : dict [State , int ], entries : set [Entry ]
149+ ) -> tuple [dict [State , int ], set [Entry ]]:
192150 """
193151 Extends states and entries following the below algorithm:
194152
@@ -218,7 +176,7 @@ def _extend_states_and_shift_entries(
218176 if len (new_items ) == 0 :
219177 continue
220178
221- new = LR0State .from_items (new_items )
179+ new = self . state_cls .from_items (new_items )
222180
223181 number = _states .setdefault (new , len (_states ) + 1 )
224182 new .set_number (number )
@@ -227,6 +185,90 @@ def _extend_states_and_shift_entries(
227185
228186 return _states , _entries
229187
188+ @abstractmethod
189+ def _create_initial_items (self ) -> set [Item ]:
190+ raise NotImplementedError ()
191+
192+ @abstractmethod
193+ def _create_reduce_entries (self , states : set [State ]) -> set [Entry ]:
194+ raise NotImplementedError ()
195+
196+
197+ class LR0Generator (Generator ):
198+ """
199+ Generator of LR0 parsing tables.
200+ """
201+
202+ table_cls = LR0ParsingTable
203+ state_cls = LR0State
204+ item_cls = LR0Item
205+
206+ def closure (self , items : set [LR0Item ]) -> set [LR0Item ]:
207+ """
208+ Computes and returns the closure for the given set of items.
209+
210+ The closure operation adds more items to a set of items when there
211+ is a dot to the left of a non-terminal symbol.
212+
213+ e.g.
214+ for any item S -> . E in the given items, closure adds E -> . T
215+ and T -> . x, where E -> T and T -> x are production rules.
216+ """
217+ _set = {item for item in items }
218+ __set = set ()
219+
220+ while __set != _set :
221+ __set = {i for i in _set }
222+
223+ for item in items :
224+ if item .dot_is_last ():
225+ continue
226+
227+ new_items = self ._get_related_items (item .after_dot )
228+ _set |= new_items
229+
230+ return _set
231+
232+ def goto (self , items : set [LR0Item ], token : Token ) -> set [LR0Item ]:
233+ """
234+ Computes and returns the GOTO set for the given set of items.
235+
236+ The goto operation creates a set where all items have the dot past the
237+ given symbol.
238+ """
239+ _set : set [LR0Item ] = set ()
240+
241+ for item in items :
242+ if item .dot_is_last () or item .after_dot != token :
243+ continue
244+
245+ next_item = LR0Item (item .rule , item .position + 1 )
246+ _set .add (next_item )
247+
248+ return self .closure (_set )
249+
250+ def _get_related_items (self , symbol : Token ) -> set [LR0Item ]:
251+ """
252+ e.g. the items X -> .g, Y -> .p would be returned for the below grammar rules:
253+ 1. X -> g
254+ 2. X -> Y
255+ 3. Y -> p
256+ where 'g' and 'p' are terminals.
257+ """
258+ _set : set [LR0Item ] = set ()
259+
260+ for rule in self .grammar .rules :
261+ if rule .lhs == symbol :
262+ _set .add (LR0Item (rule , 0 ))
263+
264+ if rule .rhs_len == 1 and not rule .rhs [0 ].is_terminal :
265+ _set |= self ._get_related_items (rule .rhs [0 ])
266+
267+ return _set
268+
269+ def _create_initial_items (self ) -> set [LR0Item ]:
270+ return self .closure ({LR0Item (self .grammar .starting_rule , 0 )})
271+
230272 def _create_reduce_entries (self , states : set [LR0State ]) -> set [Entry ]:
231273 """
232274 Computes and returns the entries for reduce actions and the accept action.
@@ -249,20 +291,103 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
249291
250292
251293class SLRGenerator (LR0Generator ):
252- def generate (self ) -> SLRParsingTable :
294+ table_cls = SLRParsingTable
295+
296+ def _create_reduce_entries (self , states : set [LR0State ]) -> set [Entry ]:
253297 """
254- Generates an SLR parsing table for the configured grammar .
298+ Computes and returns the entries for reduce actions and the accept action .
255299 """
256- states , shift_entries = self ._create_states_and_shift_entries ()
257- reduce_entries = self ._create_reduce_entries (states )
300+ entries : set [Entry ] = set ()
258301
259- entries = shift_entries | reduce_entries
302+ for state in states :
303+ for item in state .items :
304+ if item .after_dot == Token .eof ():
305+ entries .add (Entry (state , Token .eof (), Action .accept ()))
260306
261- table = SLRParsingTable .from_entries (entries , self .grammar )
307+ if not item .dot_is_last ():
308+ continue
262309
263- return table
310+ for token in self ._follow (item .rule .lhs ):
311+ entries .add (Entry (state , token , Action .reduce (item .rule )))
264312
265- def _create_reduce_entries (self , states : set [LR0State ]) -> set [Entry ]:
313+ return entries
314+
315+
316+ class LR1Generator (Generator ):
317+ table_cls = LR1ParsingTable
318+ state_cls = LR1State
319+ item_cls = LR1Item
320+
321+ def closure (self , items : set [LR1Item ]) -> set [LR1Item ]:
322+ """
323+ Computes and returns the closure for the given set of items.
324+
325+ The closure operation adds more items to a set of items when there
326+ is a dot to the left of a non-terminal symbol.
327+ """
328+ _set = {item for item in items }
329+ __set = set ()
330+
331+ while __set != _set :
332+ __set = {i for i in _set }
333+
334+ for item in __set :
335+ if item .dot_is_last ():
336+ continue
337+
338+ if item .position + 1 < item .rule .rhs_len :
339+ next_symbol = item .rule .rhs [item .position + 1 ]
340+ else :
341+ next_symbol = None
342+
343+ new_items = self ._get_related_items (
344+ item .after_dot , next_symbol , item .lookahead_token
345+ )
346+ _set |= new_items
347+
348+ return _set
349+
350+ def goto (self , items : set [LR1Item ], token : Token ) -> set [LR1Item ]:
351+ """
352+ Computes and returns the GOTO set for the given set of items.
353+
354+ The goto operation creates a set where all items have the dot past the
355+ given symbol.
356+ """
357+ _set : set [LR1Item ] = set ()
358+
359+ for item in items :
360+ if item .dot_is_last () or item .after_dot != token :
361+ continue
362+
363+ next_item = LR1Item (item .rule , item .position + 1 , item .lookahead_token )
364+ _set .add (next_item )
365+
366+ return self .closure (_set )
367+
368+ def _get_related_items (
369+ self , symbol : Token , next_symbol : Optional [Token ], lookahead_token : Token
370+ ) -> set [LR1Item ]:
371+ _set : set [LR1Item ] = set ()
372+
373+ if next_symbol is None :
374+ lookaheads = (lookahead_token ,)
375+ else :
376+ lookaheads = (next_symbol , lookahead_token )
377+
378+ for rule in self .grammar .rules :
379+ if rule .lhs != symbol :
380+ continue
381+
382+ for s in self ._first (* lookaheads ):
383+ _set .add (LR1Item (rule , 0 , s ))
384+
385+ if rule .rhs_len == 1 and not rule .rhs [0 ].is_terminal :
386+ _set |= self ._get_related_items (rule .rhs [0 ], None , s )
387+
388+ return _set
389+
390+ def _create_reduce_entries (self , states : set [LR1State ]) -> set [Entry ]:
266391 """
267392 Computes and returns the entries for reduce actions and the accept action.
268393 """
@@ -276,7 +401,11 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
276401 if not item .dot_is_last ():
277402 continue
278403
279- for token in self ._follow (item .rule .lhs ):
280- entries .add (Entry (state , token , Action .reduce (item .rule )))
404+ entries .add (
405+ Entry (state , item .lookahead_token , Action .reduce (item .rule ))
406+ )
281407
282408 return entries
409+
410+ def _create_initial_items (self ) -> set [LR1Item ]:
411+ return self .closure ({LR1Item (self .grammar .starting_rule , 0 , Token .eof ())})
0 commit comments