python3kgae
diff --git a/‎clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
Lines changed: 52 additions & 18 deletions b/‎clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
Lines changed: 52 additions & 18 deletions
diff --git a/‎clang-tools-extra/pseudo/lib/GLR.cpp
Lines changed: 13 additions & 12 deletions b/‎clang-tools-extra/pseudo/lib/GLR.cpp
Lines changed: 13 additions & 12 deletions
diff --git a/‎clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
Lines changed: 20 additions & 17 deletions b/‎clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
Lines changed: 20 additions & 17 deletions
@@ -38,6 +38,8 @@
 
 #include "clang-pseudo/grammar/Grammar.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/Capacity.h"
 #include <cstdint>
 #include <vector>
 
@@ -62,6 +64,9 @@ class LRTable {
 
   // Action represents the terminal and nonterminal actions, it combines the
   // entry of the ACTION and GOTO tables from the LR literature.
+  //
+  // FIXME: as we move away from a homogeneous table structure shared between
+  // action types, this class becomes less useful. Remove it.
   class Action {
   public:
     enum Kind : uint8_t {
@@ -73,8 +78,6 @@ class LRTable {
       // A shift is a forward transition, and the value n is the next state that
       // the parser is to enter.
       Shift,
-      // Reduce by a rule: pop the state stack.
-      Reduce,
 
       // NOTE: there are no typical accept actions in the LRtable, accept
       // actions are handled specifically in the parser -- if the parser
@@ -91,7 +94,6 @@ class LRTable {
 
     static Action goTo(StateID S) { return Action(GoTo, S); }
     static Action shift(StateID S) { return Action(Shift, S); }
-    static Action reduce(RuleID RID) { return Action(Reduce, RID); }
     static Action sentinel() { return Action(Sentinel, 0); }
 
     StateID getShiftState() const {
@@ -102,10 +104,6 @@ class LRTable {
       assert(kind() == GoTo);
       return Value;
     }
-    RuleID getReduceRule() const {
-      assert(kind() == Reduce);
-      return Value;
-    }
     Kind kind() const { return static_cast<Kind>(K); }
 
     bool operator==(const Action &L) const { return opaque() == L.opaque(); }
@@ -123,9 +121,6 @@ class LRTable {
     uint16_t Value : ValueBits;
   };
 
-  // Returns all available actions for the given state on a terminal.
-  // Expected to be called by LR parsers.
-  llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
   // Returns the state after we reduce a nonterminal.
   // Expected to be called by LR parsers.
   // REQUIRES: Nonterminal is valid here.
@@ -135,9 +130,26 @@ class LRTable {
   // If the terminal is invalid here, returns None.
   llvm::Optional<StateID> getShiftState(StateID State, SymbolID Terminal) const;
 
-  // Looks up available actions.
-  // Returns empty if no available actions in the table.
-  llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
+  // Returns the possible reductions from a state.
+  //
+  // These are not keyed by a lookahead token. Instead, call canFollow() to
+  // check whether a reduction should apply in the current context:
+  //   for (RuleID R : LR.getReduceRules(S)) {
+  //     if (!LR.canFollow(G.lookupRule(R).Target, NextToken))
+  //       continue;
+  //     // ...apply reduce...
+  //   }
+  llvm::ArrayRef<RuleID> getReduceRules(StateID State) const {
+    return llvm::makeArrayRef(&Reduces[ReduceOffset[State]],
+                              &Reduces[ReduceOffset[State + 1]]);
+  }
+  // Returns whether Terminal can follow Nonterminal in a valid source file.
+  bool canFollow(SymbolID Nonterminal, SymbolID Terminal) const {
+    assert(isToken(Terminal));
+    assert(isNonterminal(Nonterminal));
+    return FollowSets.test(tok::NUM_TOKENS * Nonterminal +
+                           symbolToToken(Terminal));
+  }
 
   // Returns the state from which the LR parser should start to parse the input
   // tokens as the given StartSymbol.
@@ -151,9 +163,12 @@ class LRTable {
   StateID getStartState(SymbolID StartSymbol) const;
 
   size_t bytes() const {
-    return sizeof(*this) + Actions.capacity() * sizeof(Action) +
-           Symbols.capacity() * sizeof(SymbolID) +
-           StateOffset.capacity() * sizeof(uint32_t);
+    return sizeof(*this) + llvm::capacity_in_bytes(Actions) +
+           llvm::capacity_in_bytes(Symbols) +
+           llvm::capacity_in_bytes(StateOffset) +
+           llvm::capacity_in_bytes(Reduces) +
+           llvm::capacity_in_bytes(ReduceOffset) +
+           llvm::capacity_in_bytes(FollowSets);
   }
 
   std::string dumpStatistics() const;
@@ -162,17 +177,25 @@ class LRTable {
   // Build a SLR(1) parsing table.
   static LRTable buildSLR(const Grammar &G);
 
-  class Builder;
+  struct Builder;
   // Represents an entry in the table, used for building the LRTable.
   struct Entry {
     StateID State;
     SymbolID Symbol;
     Action Act;
   };
+  struct ReduceEntry {
+    StateID State;
+    RuleID Rule;
+  };
   // Build a specifid table for testing purposes.
-  static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>);
+  static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
+                               llvm::ArrayRef<ReduceEntry>);
 
 private:
+  // Looks up actions stored in the generic table.
+  llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
+
   // Conceptually the LR table is a multimap from (State, SymbolID) => Action.
   // Our physical representation is quite different for compactness.
 
@@ -188,6 +211,17 @@ class LRTable {
   std::vector<Action> Actions;
   // A sorted table, storing the start state for each target parsing symbol.
   std::vector<std::pair<SymbolID, StateID>> StartStates;
+
+  // Given a state ID S, the half-open range of Reduces is
+  // [ReduceOffset[S], ReduceOffset[S+1])
+  std::vector<uint32_t> ReduceOffset;
+  std::vector<RuleID> Reduces;
+  // Conceptually this is a bool[SymbolID][Token], each entry describing whether
+  // the grammar allows the (nonterminal) symbol to be followed by the token.
+  //
+  // This is flattened by encoding the (SymbolID Nonterminal, tok::Kind Token)
+  // as an index: Nonterminal * NUM_TOKENS + Token.
+  llvm::BitVector FollowSets;
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
 
 
@@ -251,9 +251,8 @@ class GLRReduce {
 private:
   // pop walks up the parent chain(s) for a reduction from Head by to Rule.
   // Once we reach the end, record the bases and sequences.
-  void pop(const GSS::Node *Head, RuleID RID) {
+  void pop(const GSS::Node *Head, RuleID RID, const Rule &Rule) {
     LLVM_DEBUG(llvm::dbgs() << "  Pop " << Params.G.dumpRule(RID) << "\n");
-    const auto &Rule = Params.G.lookupRule(RID);
     Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID};
     TempSequence.resize_for_overwrite(Rule.Size);
     auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
@@ -286,11 +285,11 @@ class GLRReduce {
       // In trivial cases, we perform the complete reduce here!
       if (popAndPushTrivial())
         continue;
-      for (const auto &A :
-           Params.Table.getActions((*Heads)[NextPopHead]->State, Lookahead)) {
-        if (A.kind() != LRTable::Action::Reduce)
-          continue;
-        pop((*Heads)[NextPopHead], A.getReduceRule());
+      for (RuleID RID :
+           Params.Table.getReduceRules((*Heads)[NextPopHead]->State)) {
+        const auto &Rule = Params.G.lookupRule(RID);
+        if (Params.Table.canFollow(Rule.Target, Lookahead))
+          pop((*Heads)[NextPopHead], RID, Rule);
       }
     }
   }
@@ -367,21 +366,23 @@ class GLRReduce {
   //  - the head must have only one reduction rule
   //  - the reduction path must be a straight line (no multiple parents)
   // (Roughly this means there's no local ambiguity, so the LR algorithm works).
+  //
+  // Returns true if we successfully consumed the next unpopped head.
   bool popAndPushTrivial() {
     if (!Sequences.empty() || Heads->size() != NextPopHead + 1)
       return false;
     const GSS::Node *Head = Heads->back();
     llvm::Optional<RuleID> RID;
-    for (auto &A : Params.Table.getActions(Head->State, Lookahead)) {
-      if (A.kind() != LRTable::Action::Reduce)
-        continue;
-      if (RID)
+    for (RuleID R : Params.Table.getReduceRules(Head->State)) {
+      if (RID.hasValue())
         return false;
-      RID = A.getReduceRule();
+      RID = R;
     }
     if (!RID)
       return true; // no reductions available, but we've processed the head!
     const auto &Rule = Params.G.lookupRule(*RID);
+    if (!Params.Table.canFollow(Rule.Target, Lookahead))
+      return true; // reduction is not available
     const GSS::Node *Base = Head;
     TempSequence.resize_for_overwrite(Rule.Size);
     for (unsigned I = 0; I < Rule.Size; ++I) {
 
@@ -10,6 +10,7 @@
 #include "clang-pseudo/grammar/Grammar.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
@@ -21,8 +22,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
   switch (A.kind()) {
   case LRTable::Action::Shift:
     return OS << llvm::formatv("shift state {0}", A.getShiftState());
-  case LRTable::Action::Reduce:
-    return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
   case LRTable::Action::GoTo:
     return OS << llvm::formatv("go to state {0}", A.getGoToState());
   case LRTable::Action::Sentinel:
@@ -36,9 +35,11 @@ std::string LRTable::dumpStatistics() const {
 Statistics of the LR parsing table:
     number of states: {0}
     number of actions: {1}
-    size of the table (bytes): {2}
+    number of reduces: {2}
+    size of the table (bytes): {3}
 )",
-                       StateOffset.size() - 1, Actions.size(), bytes())
+                       StateOffset.size() - 1, Actions.size(), Reduces.size(),
+                       bytes())
       .str();
 }
 
@@ -52,19 +53,27 @@ std::string LRTable::dumpForTests(const Grammar &G) const {
       SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
       for (auto A : find(S, TokID)) {
         if (A.kind() == LRTable::Action::Shift)
-          OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",
+          OS.indent(4) << llvm::formatv("{0}: shift state {1}\n",
                                         G.symbolName(TokID), A.getShiftState());
-        else if (A.kind() == LRTable::Action::Reduce)
-          OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
-                                        G.symbolName(TokID), A.getReduceRule(),
-                                        G.dumpRule(A.getReduceRule()));
       }
     }
+    for (RuleID R : getReduceRules(S)) {
+      SymbolID Target = G.lookupRule(R).Target;
+      std::vector<llvm::StringRef> Terminals;
+      for (unsigned Terminal = 0; Terminal < NumTerminals; ++Terminal) {
+        SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
+        if (canFollow(Target, TokID))
+          Terminals.push_back(G.symbolName(TokID));
+      }
+      OS.indent(4) << llvm::formatv("{0}: reduce by rule {1} '{2}'\n",
+                                    llvm::join(Terminals, " "), R,
+                                    G.dumpRule(R));
+    }
     for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
          ++NontermID) {
       if (find(S, NontermID).empty())
         continue;
-      OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",
+      OS.indent(4) << llvm::formatv("{0}: go to state {1}\n",
                                     G.symbolName(NontermID),
                                     getGoToState(S, NontermID));
     }
@@ -77,18 +86,12 @@ LRTable::getShiftState(StateID State, SymbolID Terminal) const {
   // FIXME: we spend a significant amount of time on misses here.
   // We could consider storing a std::bitset for a cheaper test?
   assert(pseudo::isToken(Terminal) && "expected terminal symbol!");
-  for (const auto &Result : getActions(State, Terminal))
+  for (const auto &Result : find(State, Terminal))
     if (Result.kind() == Action::Shift)
       return Result.getShiftState(); // unique: no shift/shift conflicts.
   return llvm::None;
 }
 
-llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State,
-                                                    SymbolID Terminal) const {
-  assert(pseudo::isToken(Terminal) && "expect terminal symbol!");
-  return find(State, Terminal);
-}
-
 LRTable::StateID LRTable::getGoToState(StateID State,
                                        SymbolID Nonterminal) const {
   assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!");