1+ from abc import ABC , abstractmethod
2+ from typing import Optional , Type
3+
14from syntactes import Grammar , Token
25from syntactes ._action import Action
3- from syntactes ._item import LR0Item
4- from syntactes ._state import LR0State
5- from syntactes .parsing_table import Entry , LR0ParsingTable , SLRParsingTable
6-
7-
8- class LR0Generator :
9- """
10- Generator of LR0 parsing tables.
11- """
6+ from syntactes ._item import Item , LR0Item , LR1Item
7+ from syntactes ._state import LR0State , LR1State , State
8+ from syntactes .parsing_table import (
9+ Entry ,
10+ LR0ParsingTable ,
11+ LR1ParsingTable ,
12+ ParsingTable ,
13+ SLRParsingTable ,
14+ )
15+
16+
17+ class Generator (ABC ):
18+ table_cls : Type [ParsingTable ]
19+ state_cls : Type [State ]
20+ item_cls : Type [Item ]
1221
1322 def __init__ (self , grammar : Grammar ) -> None :
1423 self .grammar = grammar
1524
16- def generate (self ) -> LR0ParsingTable :
25+ @abstractmethod
26+ def closure (self , items : set [Item ]) -> set [Item ]:
27+ raise NotImplementedError ()
28+
29+ @abstractmethod
30+ def goto (self , items : set [Item ], token : Token ) -> set [Item ]:
31+ raise NotImplementedError ()
32+
33+ def generate (self ) -> ParsingTable :
1734 """
18- Generates an LR0 parsing table for the configured grammar.
35+ Generates an parsing table for the configured grammar.
1936 """
2037 states , shift_entries = self ._create_states_and_shift_entries ()
2138 reduce_entries = self ._create_reduce_entries (states )
2239
2340 entries = shift_entries | reduce_entries
2441
25- table = LR0ParsingTable .from_entries (entries , self .grammar )
42+ table = self . table_cls .from_entries (entries , self .grammar )
2643
2744 return table
2845
29- def closure (self , items : set [LR0Item ]) -> set [LR0Item ]:
30- """
31- Computes and returns the closure for the given set of items.
32-
33- The closure operation adds more items to a set of items when there
34- is a dot to the left of a non-terminal symbol.
35-
36- e.g.
37- for any item S -> . E in the given items, closure adds E -> . T
38- and T -> . x, where E -> T and T -> x are production rules.
39- """
40- _set = {item for item in items }
41-
42- for item in items :
43- if item .dot_is_last ():
44- continue
45-
46- new_items = self ._get_related_items (item .after_dot )
47- _set |= new_items
48-
49- return _set
50-
51- def goto (self , items : set [LR0Item ], token : Token ) -> set [LR0Item ]:
52- """
53- Computes and returns the GOTO set for the given set of items.
54-
55- The goto operation creates a set where all items have the dot past the
56- given symbol.
57- """
58- _set : set [LR0Item ] = set ()
59-
60- for item in items :
61- if item .dot_is_last () or item .after_dot != token :
62- continue
63-
64- next_item = LR0Item (item .rule , item .position + 1 )
65- _set .add (next_item )
66-
67- return self .closure (_set )
68-
69- def get_states (self ) -> set [LR0State ]:
46+ def get_states (self ) -> set [State ]:
7047 """
7148 Returns the set of automaton states for the configured grammar.
7249 """
@@ -148,6 +125,68 @@ def _follow(self, symbol: Token) -> set[Token]:
148125
149126 return _set
150127
128+ @abstractmethod
129+ def _create_states_and_shift_entries (self ) -> tuple [set [State ], Entry ]:
130+ raise NotImplementedError ()
131+
132+ @abstractmethod
133+ def _create_reduce_entries (self , states : set [State ]) -> set [Entry ]:
134+ raise NotImplementedError ()
135+
136+
137+ class LR0Generator (Generator ):
138+ """
139+ Generator of LR0 parsing tables.
140+ """
141+
142+ table_cls = LR0ParsingTable
143+ state_cls = LR0State
144+ item_cls = LR0Item
145+
146+ def closure (self , items : set [LR0Item ]) -> set [LR0Item ]:
147+ """
148+ Computes and returns the closure for the given set of items.
149+
150+ The closure operation adds more items to a set of items when there
151+ is a dot to the left of a non-terminal symbol.
152+
153+ e.g.
154+ for any item S -> . E in the given items, closure adds E -> . T
155+ and T -> . x, where E -> T and T -> x are production rules.
156+ """
157+ _set = {item for item in items }
158+ __set = set ()
159+
160+ while __set != _set :
161+ __set = {i for i in _set }
162+
163+ for item in items :
164+ if item .dot_is_last ():
165+ continue
166+
167+ new_items = self ._get_related_items (item .after_dot )
168+ _set |= new_items
169+
170+ return _set
171+
172+ def goto (self , items : set [LR0Item ], token : Token ) -> set [LR0Item ]:
173+ """
174+ Computes and returns the GOTO set for the given set of items.
175+
176+ The goto operation creates a set where all items have the dot past the
177+ given symbol.
178+ """
179+ _set : set [LR0Item ] = set ()
180+
181+ for item in items :
182+ if item .dot_is_last () or item .after_dot != token :
183+ continue
184+
185+ next_item = LR0Item (item .rule , item .position + 1 )
186+ _set .add (next_item )
187+
188+ return self .closure (_set )
189+
151190 def _get_related_items (self , symbol : Token ) -> set [LR0Item ]:
152191 """
153192 e.g. the items X -> .g, Y -> .p would be returned for the below grammar rules:
@@ -249,20 +288,155 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
249288
250289
251290class SLRGenerator (LR0Generator ):
252- def generate (self ) -> SLRParsingTable :
291+ table_cls = SLRParsingTable
292+
293+ def _create_reduce_entries (self , states : set [LR0State ]) -> set [Entry ]:
253294 """
254- Generates an SLR parsing table for the configured grammar .
295+ Computes and returns the entries for reduce actions and the accept action .
255296 """
256- states , shift_entries = self ._create_states_and_shift_entries ()
257- reduce_entries = self ._create_reduce_entries (states )
297+ entries : set [Entry ] = set ()
258298
259- entries = shift_entries | reduce_entries
299+ for state in states :
300+ for item in state .items :
301+ if item .after_dot == Token .eof ():
302+ entries .add (Entry (state , Token .eof (), Action .accept ()))
260303
261- table = SLRParsingTable .from_entries (entries , self .grammar )
304+ if not item .dot_is_last ():
305+ continue
262306
263- return table
307+ for token in self ._follow (item .rule .lhs ):
308+ entries .add (Entry (state , token , Action .reduce (item .rule )))
264309
265- def _create_reduce_entries (self , states : set [LR0State ]) -> set [Entry ]:
310+ return entries
311+
312+
313+ class LR1Generator (Generator ):
314+ table_cls = LR1ParsingTable
315+ state_cls = LR1State
316+ item_cls = LR1Item
317+
318+ def closure (self , items : set [LR1Item ]) -> set [LR1Item ]:
319+ """ """
320+ _set = {item for item in items }
321+ __set = set ()
322+
323+ while __set != _set :
324+ __set = {i for i in _set }
325+
326+ for item in __set :
327+ if item .dot_is_last ():
328+ continue
329+
330+ if item .position + 1 < item .rule .rhs_len :
331+ next_symbol = item .rule .rhs [item .position + 1 ]
332+ else :
333+ next_symbol = None
334+
335+ new_items = self ._get_related_items (
336+ item .after_dot , next_symbol , item .lookahead_token
337+ )
338+ _set |= new_items
339+
340+ return _set
341+
342+ def goto (self , items : set [LR1Item ], token : Token ) -> set [LR1Item ]:
343+ """ """
344+ _set : set [LR1Item ] = set ()
345+
346+ for item in items :
347+ if item .dot_is_last () or item .after_dot != token :
348+ continue
349+
350+ next_item = LR1Item (item .rule , item .position + 1 , item .lookahead_token )
351+ _set .add (next_item )
352+
353+ return self .closure (_set )
354+
355+ def _get_related_items (
356+ self , symbol : Token , next_symbol : Optional [Token ], lookahead_token : Token
357+ ) -> set [LR1Item ]:
358+ _set : set [LR1Item ] = set ()
359+
360+ if next_symbol is None :
361+ lookaheads = (lookahead_token ,)
362+ else :
363+ lookaheads = (next_symbol , lookahead_token )
364+
365+ for rule in self .grammar .rules :
366+ if rule .lhs != symbol :
367+ continue
368+
369+ for s in self ._first (* lookaheads ):
370+ _set .add (LR1Item (rule , 0 , s ))
371+
372+ if rule .rhs_len == 1 and not rule .rhs [0 ].is_terminal :
373+ _set |= self ._get_related_items (rule .rhs [0 ], None , s )
374+
375+ return _set
376+
377+ def _create_states_and_shift_entries (self ) -> tuple [set [LR1State ], set [Entry ]]:
378+ """
379+ Computes and returns the states and entries for shift actions.
380+ """
381+ states , entries = dict (), set ()
382+
383+ initial_items = self .closure (
384+ {LR1Item (self .grammar .starting_rule , 0 , Token .eof ())}
385+ )
386+ initial_state = LR1State .from_items (initial_items )
387+ initial_state .set_number (1 )
388+ states [initial_state ] = 1
389+
390+ _states , _entries = dict (), set ()
391+ while (_states , _entries ) != (states , entries ):
392+ _states = {s : n for s , n in states .items ()}
393+ _entries = {e for e in entries }
394+ states , entries = self ._extend_states_and_shift_entries (_states , _entries )
395+
396+ return set (states .keys ()), entries
397+
398+ def _extend_states_and_shift_entries (
399+ self , states : dict [LR1State , int ], entries : set [Entry ]
400+ ) -> tuple [dict [LR1State , int ], set [Entry ]]:
401+ """
402+ Extends states and entries following the below algorithm:
403+
404+ ```
405+ for each state S in states
406+ for each item A -> a.Xb in S
407+ J = goto(S, X)
408+ states.add(J)
409+ entries.add((S->J, X))
410+ ```
411+ """
412+ _states = {s : n for s , n in states .items ()}
413+ _entries = {e for e in entries }
414+
415+ EOF = Token .eof ()
416+ for state in states :
417+ for item in state .items :
418+ if item .dot_is_last ():
419+ continue
420+
421+ if item .after_dot == EOF :
422+ state .set_final ()
423+ continue
424+
425+ new_items = self .goto (state .items , item .after_dot )
426+
427+ if len (new_items ) == 0 :
428+ continue
429+
430+ new = LR1State .from_items (new_items )
431+
432+ number = _states .setdefault (new , len (_states ) + 1 )
433+ new .set_number (number )
434+
435+ _entries .add (Entry (state , item .after_dot , Action .shift (new )))
436+
437+ return _states , _entries
438+
439+ def _create_reduce_entries (self , states : set [LR1State ]) -> set [Entry ]:
266440 """
267441 Computes and returns the entries for reduce actions and the accept action.
268442 """
@@ -276,7 +450,8 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
276450 if not item .dot_is_last ():
277451 continue
278452
279- for token in self ._follow (item .rule .lhs ):
280- entries .add (Entry (state , token , Action .reduce (item .rule )))
453+ entries .add (
454+ Entry (state , item .lookahead_token , Action .reduce (item .rule ))
455+ )
281456
282457 return entries
0 commit comments