38
38
39
39
#include " clang-pseudo/grammar/Grammar.h"
40
40
#include " llvm/ADT/ArrayRef.h"
41
+ #include " llvm/ADT/BitVector.h"
42
+ #include " llvm/Support/Capacity.h"
41
43
#include < cstdint>
42
44
#include < vector>
43
45
@@ -62,6 +64,9 @@ class LRTable {
62
64
63
65
// Action represents the terminal and nonterminal actions, it combines the
64
66
// entry of the ACTION and GOTO tables from the LR literature.
67
+ //
68
+ // FIXME: as we move away from a homogeneous table structure shared between
69
+ // action types, this class becomes less useful. Remove it.
65
70
class Action {
66
71
public:
67
72
enum Kind : uint8_t {
@@ -73,8 +78,6 @@ class LRTable {
73
78
// A shift is a forward transition, and the value n is the next state that
74
79
// the parser is to enter.
75
80
Shift,
76
- // Reduce by a rule: pop the state stack.
77
- Reduce,
78
81
79
82
// NOTE: there are no typical accept actions in the LRtable, accept
80
83
// actions are handled specifically in the parser -- if the parser
@@ -91,7 +94,6 @@ class LRTable {
91
94
92
95
static Action goTo (StateID S) { return Action (GoTo, S); }
93
96
static Action shift (StateID S) { return Action (Shift, S); }
94
- static Action reduce (RuleID RID) { return Action (Reduce, RID); }
95
97
static Action sentinel () { return Action (Sentinel, 0 ); }
96
98
97
99
StateID getShiftState () const {
@@ -102,10 +104,6 @@ class LRTable {
102
104
assert (kind () == GoTo);
103
105
return Value;
104
106
}
105
- RuleID getReduceRule () const {
106
- assert (kind () == Reduce);
107
- return Value;
108
- }
109
107
Kind kind () const { return static_cast <Kind>(K); }
110
108
111
109
bool operator ==(const Action &L) const { return opaque () == L.opaque (); }
@@ -123,9 +121,6 @@ class LRTable {
123
121
uint16_t Value : ValueBits;
124
122
};
125
123
126
- // Returns all available actions for the given state on a terminal.
127
- // Expected to be called by LR parsers.
128
- llvm::ArrayRef<Action> getActions (StateID State, SymbolID Terminal) const ;
129
124
// Returns the state after we reduce a nonterminal.
130
125
// Expected to be called by LR parsers.
131
126
// REQUIRES: Nonterminal is valid here.
@@ -135,9 +130,26 @@ class LRTable {
135
130
// If the terminal is invalid here, returns None.
136
131
llvm::Optional<StateID> getShiftState (StateID State, SymbolID Terminal) const ;
137
132
138
- // Looks up available actions.
139
- // Returns empty if no available actions in the table.
140
- llvm::ArrayRef<Action> find (StateID State, SymbolID Symbol) const ;
133
+ // Returns the possible reductions from a state.
134
+ //
135
+ // These are not keyed by a lookahead token. Instead, call canFollow() to
136
+ // check whether a reduction should apply in the current context:
137
+ // for (RuleID R : LR.getReduceRules(S)) {
138
+ // if (!LR.canFollow(G.lookupRule(R).Target, NextToken))
139
+ // continue;
140
+ // // ...apply reduce...
141
+ // }
142
+ llvm::ArrayRef<RuleID> getReduceRules (StateID State) const {
143
+ return llvm::makeArrayRef (&Reduces[ReduceOffset[State]],
144
+ &Reduces[ReduceOffset[State + 1 ]]);
145
+ }
146
+ // Returns whether Terminal can follow Nonterminal in a valid source file.
147
+ bool canFollow (SymbolID Nonterminal, SymbolID Terminal) const {
148
+ assert (isToken (Terminal));
149
+ assert (isNonterminal (Nonterminal));
150
+ return FollowSets.test (tok::NUM_TOKENS * Nonterminal +
151
+ symbolToToken (Terminal));
152
+ }
141
153
142
154
// Returns the state from which the LR parser should start to parse the input
143
155
// tokens as the given StartSymbol.
@@ -151,9 +163,12 @@ class LRTable {
151
163
StateID getStartState (SymbolID StartSymbol) const ;
152
164
153
165
size_t bytes () const {
154
- return sizeof (*this ) + Actions.capacity () * sizeof (Action) +
155
- Symbols.capacity () * sizeof (SymbolID) +
156
- StateOffset.capacity () * sizeof (uint32_t );
166
+ return sizeof (*this ) + llvm::capacity_in_bytes (Actions) +
167
+ llvm::capacity_in_bytes (Symbols) +
168
+ llvm::capacity_in_bytes (StateOffset) +
169
+ llvm::capacity_in_bytes (Reduces) +
170
+ llvm::capacity_in_bytes (ReduceOffset) +
171
+ llvm::capacity_in_bytes (FollowSets);
157
172
}
158
173
159
174
std::string dumpStatistics () const ;
@@ -162,17 +177,25 @@ class LRTable {
162
177
// Build a SLR(1) parsing table.
163
178
static LRTable buildSLR (const Grammar &G);
164
179
165
- class Builder ;
180
+ struct Builder ;
166
181
// Represents an entry in the table, used for building the LRTable.
167
182
struct Entry {
168
183
StateID State;
169
184
SymbolID Symbol;
170
185
Action Act;
171
186
};
187
+ struct ReduceEntry {
188
+ StateID State;
189
+ RuleID Rule;
190
+ };
172
191
// Build a specifid table for testing purposes.
173
- static LRTable buildForTests (const GrammarTable &, llvm::ArrayRef<Entry>);
192
+ static LRTable buildForTests (const Grammar &G, llvm::ArrayRef<Entry>,
193
+ llvm::ArrayRef<ReduceEntry>);
174
194
175
195
private:
196
+ // Looks up actions stored in the generic table.
197
+ llvm::ArrayRef<Action> find (StateID State, SymbolID Symbol) const ;
198
+
176
199
// Conceptually the LR table is a multimap from (State, SymbolID) => Action.
177
200
// Our physical representation is quite different for compactness.
178
201
@@ -188,6 +211,17 @@ class LRTable {
188
211
std::vector<Action> Actions;
189
212
// A sorted table, storing the start state for each target parsing symbol.
190
213
std::vector<std::pair<SymbolID, StateID>> StartStates;
214
+
215
+ // Given a state ID S, the half-open range of Reduces is
216
+ // [ReduceOffset[S], ReduceOffset[S+1])
217
+ std::vector<uint32_t > ReduceOffset;
218
+ std::vector<RuleID> Reduces;
219
+ // Conceptually this is a bool[SymbolID][Token], each entry describing whether
220
+ // the grammar allows the (nonterminal) symbol to be followed by the token.
221
+ //
222
+ // This is flattened by encoding the (SymbolID Nonterminal, tok::Kind Token)
223
+ // as an index: Nonterminal * NUM_TOKENS + Token.
224
+ llvm::BitVector FollowSets;
191
225
};
192
226
llvm::raw_ostream &operator <<(llvm::raw_ostream &, const LRTable::Action &);
193
227
0 commit comments