init optimizations

ea-rus · ea-rus · commit 07a5e3b6464c · 2025-04-30T09:53:20.000+03:00
diff --git a/sly/yacc.py b/sly/yacc.py
@@ -202,6 +202,7 @@ def __init__(self, number, name, prod, precedence=('right', 0), func=None, file=
         self.file     = file
         self.line     = line
         self.prec     = precedence
+        self.lr0_added = 0
         
         # Internal settings used during table construction
         self.len  = len(self.prod)   # Length of the production
@@ -319,7 +320,7 @@ def __init__(self, p, n):
         self.prod       = list(p.prod)
         self.number     = p.number
         self.lr_index   = n
-        self.lookaheads = {}
+        self.lookaheads = defaultdict(set)
         self.prod.insert(n, '.')
         self.prod       = tuple(self.prod)
         self.len        = len(self.prod)
@@ -915,9 +916,7 @@ def traverse(x, N, stack, F, X, R, FP):
         if N[y] == 0:
             traverse(y, N, stack, F, X, R, FP)
         N[x] = min(N[x], N[y])
-        for a in F.get(y, []):
-            if a not in F[x]:
-                F[x].append(a)
+        F[x].update(F[y])
     if N[x] == d:
         N[stack[-1]] = MAXINT
         F[stack[-1]] = F[x]
@@ -945,7 +944,10 @@ def __init__(self, grammar):
         self.lr_action     = {}        # Action table
         self.lr_goto       = {}        # Goto table
         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
-        self.lr_goto_cache = {}        # Cache of computed gotos
+
+        # Cache of computed gotos
+        self.lr_goto_cache = defaultdict(dict)
+        self.lr_goto_cache2 = {}
         self.lr0_cidhash   = {}        # Cache of closures
         self._add_count    = 0         # Internal counter used to detect cycles
 
@@ -977,6 +979,11 @@ def __init__(self, grammar):
             rules = list(actions.values())
             if len(rules) == 1 and rules[0] < 0:
                 self.defaulted_states[state] = rules[0]
+        # clear cache
+        self.lr_goto_cache = None
+        self.lr_goto_cache2 = None
+        self.lr0_cidhash = None
+        self.grammar = None
 
     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
     def lr0_closure(self, I):
@@ -989,7 +996,7 @@ def lr0_closure(self, I):
             didadd = False
             for j in J:
                 for x in j.lr_after:
-                    if getattr(x, 'lr0_added', 0) == self._add_count:
+                    if x.lr0_added == self._add_count:
                         continue
                     # Add B --> .G to J
                     J.append(x.lr_next)
@@ -1005,40 +1012,60 @@ def lr0_closure(self, I):
     # objects).  With uniqueness, we can later do fast set comparisons using
     # id(obj) instead of element-wise comparison.
 
-    def lr0_goto(self, I, x):
+    def lr0_goto(self, I, x, I_d=None):
         # First we look for a previously cached entry
-        g = self.lr_goto_cache.get((id(I), x))
+        g = self.lr_goto_cache[id(I)].get(x)
         if g:
             return g
 
         # Now we generate the goto set in a way that guarantees uniqueness
         # of the result
 
-        s = self.lr_goto_cache.get(x)
+        s = self.lr_goto_cache2.get(x)
         if not s:
             s = {}
-            self.lr_goto_cache[x] = s
+            self.lr_goto_cache2[x] = s
 
         gs = []
-        for p in I:
-            n = p.lr_next
-            if n and n.lr_before == x:
+
+        if I_d is None:
+            for p in I:
+                n = p.lr_next
+                if n and n.lr_before == x:
+                    s1 = s.get(id(n))
+                    if not s1:
+                        s1 = {}
+                        s[id(n)] = s1
+                    gs.append(n)
+                    s = s1
+        else:
+            for n in I_d[x]:
                 s1 = s.get(id(n))
                 if not s1:
                     s1 = {}
                     s[id(n)] = s1
                 gs.append(n)
                 s = s1
+
         g = s.get('$end')
         if not g:
             if gs:
                 g = self.lr0_closure(gs)
                 s['$end'] = g
             else:
                 s['$end'] = gs
-        self.lr_goto_cache[(id(I), x)] = g
+        self.lr_goto_cache[id(I)][x] = g
         return g
 
+    def get_i_map(self, I):
+        # creates structure for faster search
+        I_d = defaultdict(list)
+        for p in I:
+            n = p.lr_next
+            if n:
+                I_d[n.lr_before].append(n)
+        return I_d
+
     # Compute the LR(0) sets of item function
     def lr0_items(self):
         C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
@@ -1053,14 +1080,16 @@ def lr0_items(self):
             I = C[i]
             i += 1
 
+            I_d = self.get_i_map(I)
+
             # Collect all of the symbols that could possibly be in the goto(I,X) sets
             asyms = {}
             for ii in I:
                 for s in ii.usyms:
                     asyms[s] = None
 
             for x in asyms:
-                g = self.lr0_goto(I, x)
+                g = self.lr0_goto(I, x, I_d)
                 if not g or id(g) in self.lr0_cidhash:
                     continue
                 self.lr0_cidhash[id(g)] = len(C)
@@ -1146,21 +1175,19 @@ def find_nonterminal_transitions(self, C):
     # -----------------------------------------------------------------------------
 
     def dr_relation(self, C, trans, nullable):
-        dr_set = {}
         state, N = trans
-        terms = []
+        terms = set()
 
         g = self.lr0_goto(C[state], N)
         for p in g:
             if p.lr_index < p.len - 1:
                 a = p.prod[p.lr_index+1]
                 if a in self.grammar.Terminals:
-                    if a not in terms:
-                        terms.append(a)
+                    terms.add(a)
 
         # This extra bit is to handle the start state
         if state == 0 and N == self.grammar.Productions[0].prod[0]:
-            terms.append('$end')
+            terms.add('$end')
 
         return terms
 
@@ -1335,14 +1362,11 @@ def compute_follow_sets(self, ntrans, readsets, inclsets):
 
     def add_lookaheads(self, lookbacks, followset):
         for trans, lb in lookbacks.items():
+            f = followset[trans]
+
             # Loop over productions in lookback
             for state, p in lb:
-                if state not in p.lookaheads:
-                    p.lookaheads[state] = set()
-
-                f = followset.get(trans, [])
-                for a in f:
-                    p.lookaheads[state].add(a)
+                p.lookaheads[state].update(f)
 
     # -----------------------------------------------------------------------------
     # add_lalr_lookaheads()
@@ -1391,6 +1415,8 @@ def lr_parse_table(self):
 
         # Build the parser table, state by state
         for st, I in enumerate(C):
+            I_d = self.get_i_map(I)
+
             descrip = []
             # Loop over each production in I
             actlist = []              # List of actions
@@ -1468,7 +1494,7 @@ def lr_parse_table(self):
                         i = p.lr_index
                         a = p.prod[i+1]       # Get symbol right after the "."
                         if a in self.grammar.Terminals:
-                            g = self.lr0_goto(I, a)
+                            g = self.lr0_goto(I, a, I_d)
                             j = self.lr0_cidhash.get(id(g), -1)
                             if j >= 0:
                                 # We are in a shift state
@@ -1524,7 +1550,7 @@ def lr_parse_table(self):
                     if s in self.grammar.Nonterminals:
                         nkeys[s] = None
             for n in nkeys:
-                g = self.lr0_goto(I, n)
+                g = self.lr0_goto(I, n, I_d)
                 j = self.lr0_cidhash.get(id(g), -1)
                 if j >= 0:
                     st_goto[n] = j