Skip to content

Commit adf55f6

Browse files
committed
Add benchmark SQL tests, remove dead expansion code, document failed approaches
- Copy 18 complex SQL queries from benchmark/sqlite_parse/queries.sql to driver_sqlite_test.wado for better parser regression coverage (JOINs, recursive CTEs, correlated subqueries, CASE, set operations, etc.) - Remove dead code from parser_gen.wado: sll_expand_rule_ref, try_expand_opaque, strip_all_consume (not called, caused correctness bugs when active) - Keep zero-overhead return stack infrastructure (SllReturn, push_return, pop_return, return-stack-aware sll_config_first/sll_advance_inner) - Document the RuleRef expansion approach and its 3 failure modes in package-gale/CLAUDE.md to prevent repeating the same mistakes https://claude.ai/code/session_01ACVN5Rr7waUZWXtv8MFN2C
1 parent 7a1af85 commit adf55f6

File tree

3 files changed

+283
-195
lines changed

3 files changed

+283
-195
lines changed

package-gale/AGENTS.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,30 @@ Commit the updated golden files.
101101

102102
- **No backtracking in new code.** Use static k-token lookahead prediction to disambiguate alternatives. If prediction cannot resolve within depth 5, file an issue rather than adding backtracking. Existing backtracking sites are being migrated to prediction; do not add new ones.
103103

104+
## Failed Approaches (Do Not Repeat)
105+
106+
### RuleRef Expansion via Return Stack (2026-03)
107+
108+
**Goal:** Expand multi-token RuleRefs during SLL prediction to reduce backtracking.
109+
110+
**What was tried:** Added `return_stack` to `SllConfig` to track continuation points when entering a referenced rule. `sll_expand_rule_ref` pushed return frames and advanced inside sub-rules. `try_expand_opaque` called expansion when `build_sll_node` would otherwise produce `Backtrack`.
111+
112+
**Why it failed (3 distinct bugs):**
113+
114+
1. **Consume node corruption:** `build_sll_node` emits `Consume(element, child)` when all configs share a common terminal. For expanded configs inside a sub-rule, this emits `p.expect(K_FROM)` at the _decision point_, consuming a token that belongs to the referenced rule (e.g., `delete_stmt`). Fix attempted: `strip_all_consume` — but this loses disambiguation information.
115+
116+
2. **Depth-mixed Dispatch:** Expanded configs produce Dispatch branches for tokens _inside_ sub-rules (e.g., `K_RECURSIVE` from `with_clause`). When multiple alternatives share the same prefix rule (`with_clause`), these dispatches are meaningless — every alternative sees the same tokens. The generated parser enters wrong branches and fails or times out.
117+
118+
3. **Dedup false resolution:** `sll_dedup_by_alt` keeps one config per `alt_index`. When two alternatives expand to configs with identical FIRST sets (e.g., `join_clause` and `table_or_subquery` both start with `table_or_subquery`), dedup merges them into a single alt. The prediction then emits a `Leaf` for the wrong alternative, silently dropping the other.
119+
120+
**What remains:** The `return_stack` field on `SllConfig`, `push_return`, `pop_return`, and return-stack-aware `sll_config_first` / `sll_advance_inner` are committed as zero-overhead infrastructure. They don't affect generated output.
121+
122+
**Lessons:**
123+
124+
- Tokens from inside expanded sub-rules cannot be used for prediction at the decision point level
125+
- To use expansion correctly, the prediction must map expanded tokens back to the decision point's lookahead depth (essentially an ATN simulator)
126+
- `sll_dedup_by_alt` is too aggressive for expanded configs — alternatives sharing sub-rules get merged
127+
104128
## On-Task-Done
105129

106130
When completing a task, run from the project root:

package-gale/src/parser_gen.wado

Lines changed: 0 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -77,25 +77,6 @@ fn strip_dead_consume(node: PredictionNode) -> PredictionNode {
7777
return node;
7878
}
7979

80-
/// Strip ALL Consume nodes from a prediction tree.
81-
/// Used for expanded RuleRef predictions where Consume would incorrectly
82-
/// consume tokens belonging to the sub-rule at the decision point.
83-
fn strip_all_consume(node: PredictionNode) -> PredictionNode {
84-
if let Consume(c) = node {
85-
return strip_all_consume(c.child);
86-
}
87-
if let Dispatch(d) = node {
88-
let mut new_branches: Array<PredictionBranch> = [];
89-
for let b of d.branches {
90-
new_branches.append(PredictionBranch {
91-
tokens: b.tokens,
92-
child: strip_all_consume(b.child),
93-
});
94-
}
95-
return PredictionNode::Dispatch(PredictionDispatch { depth: d.depth, branches: new_branches });
96-
}
97-
return node;
98-
}
9980

10081
struct SllReturn {
10182
elements: Array<Element>,
@@ -184,52 +165,6 @@ fn sll_advance(c: &SllConfig, token: &String, all_rules: &Array<ParserRule>, lit
184165
return sll_advance_inner(c, token, all_rules, lit_tokens, 0);
185166
}
186167

187-
fn sll_expand_rule_ref(name: &String, c: &SllConfig, continuation_pos: i32, token: &String, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>, inline_depth: i32) -> Array<SllConfig> {
188-
// Depth guard: only 1 level of RuleRef expansion to prevent blowup.
189-
if c.return_stack.len() >= 1 {
190-
return [SllConfig { alt_index: c.alt_index, elements: c.elements, pos: -1, return_stack: [] }];
191-
}
192-
// Only expand rules with few alternatives to avoid combinatorial explosion.
193-
let mut rule_alt_count = 0;
194-
for let rule of all_rules {
195-
if rule.name == *name {
196-
rule_alt_count = rule.alternatives.len();
197-
break;
198-
}
199-
}
200-
if rule_alt_count > 5 {
201-
return [SllConfig { alt_index: c.alt_index, elements: c.elements, pos: -1, return_stack: [] }];
202-
}
203-
let new_stack = push_return(
204-
&c.return_stack,
205-
c.elements,
206-
continuation_pos,
207-
);
208-
let mut results: Array<SllConfig> = [];
209-
for let rule of all_rules {
210-
if rule.name != *name {
211-
continue;
212-
}
213-
for let alt of rule.alternatives {
214-
let expanded = SllConfig {
215-
alt_index: c.alt_index,
216-
elements: alt.elements,
217-
pos: 0,
218-
return_stack: new_stack,
219-
};
220-
let advanced = sll_advance_inner(&expanded, token, all_rules, lit_tokens, inline_depth + 1);
221-
for let a of advanced {
222-
results.append(a);
223-
}
224-
}
225-
break;
226-
}
227-
if results.is_empty() {
228-
return [];
229-
}
230-
return results;
231-
}
232-
233168
fn sll_advance_inner(c: &SllConfig, token: &String, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>, inline_depth: i32) -> Array<SllConfig> {
234169
// Limit inline expansion depth to prevent explosion from recursive rules.
235170
if inline_depth > 2 {
@@ -544,136 +479,6 @@ fn build_sll_node(configs: &Array<SllConfig>, depth: i32, max_depth: i32, all_ru
544479
return PredictionNode::Dispatch(PredictionDispatch { depth, branches });
545480
}
546481

547-
/// Try to resolve opaque configs by expanding their multi-token RuleRefs.
548-
/// Returns Some(node) if expansion produces a better prediction tree, None otherwise.
549-
/// `non_opaque_configs` are the already-advanced transparent configs for this token.
550-
fn try_expand_opaque(original_configs: &Array<SllConfig>, token: &String, non_opaque_configs: &Array<SllConfig>, opaque_alts: Array<i32>, depth: i32, max_depth: i32, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>) -> Option<PredictionNode> {
551-
// Only expand at shallow depths (up to 2 lookahead levels) to limit cost.
552-
if depth > 2 || depth >= max_depth {
553-
return null;
554-
}
555-
// Rule diversity check: only expand when opaque configs reference different rules.
556-
// If all opaque configs point to the same RuleRef, expansion can't distinguish them.
557-
let mut rule_refs: Array<String> = [];
558-
for let c of original_configs {
559-
if !array_contains_i32(&opaque_alts, c.alt_index) {
560-
continue;
561-
}
562-
if c.pos < 0 || c.pos >= c.elements.len() {
563-
continue;
564-
}
565-
if let RuleRef(name) = c.elements[c.pos] {
566-
if !array_contains_str(&rule_refs, &name) {
567-
rule_refs.append(name);
568-
}
569-
}
570-
}
571-
if rule_refs.len() <= 1 {
572-
return null;
573-
}
574-
// Re-advance opaque configs by expanding their RuleRefs.
575-
let mut expanded_configs: Array<SllConfig> = [];
576-
for let c of original_configs {
577-
if !array_contains_i32(&opaque_alts, c.alt_index) {
578-
continue;
579-
}
580-
if c.pos < 0 || c.pos >= c.elements.len() {
581-
continue;
582-
}
583-
let elem = &c.elements[c.pos];
584-
if let RuleRef(name) = *elem {
585-
let result = sll_expand_rule_ref(&name, c, c.pos + 1, token, all_rules, lit_tokens, 0);
586-
for let r of result {
587-
if r.pos != -1 {
588-
expanded_configs.append(r);
589-
}
590-
}
591-
}
592-
}
593-
if expanded_configs.is_empty() {
594-
return null;
595-
}
596-
// Combine non-opaque (already advanced) with newly expanded configs.
597-
let mut all_configs: Array<SllConfig> = [];
598-
for let c of non_opaque_configs {
599-
all_configs.append(sll_config_clone(c));
600-
}
601-
for let ec of expanded_configs {
602-
all_configs.append(ec);
603-
}
604-
// Build a flat one-level Dispatch from expanded configs.
605-
// Group configs by FIRST token, then check if each token group resolves to one alt.
606-
let mut all_tokens: Array<String> = [];
607-
let mut config_firsts: Array<Array<String>> = [];
608-
for let c of all_configs {
609-
let first = sll_config_first(&c, all_rules, lit_tokens);
610-
config_firsts.append(first);
611-
for let tk of first {
612-
if !array_contains_str(&all_tokens, &tk) {
613-
all_tokens.append(tk);
614-
}
615-
}
616-
}
617-
let mut branches: Array<PredictionBranch> = [];
618-
let mut has_improvement = false;
619-
for let mut t = 0; t < all_tokens.len(); t += 1 {
620-
let tk = &all_tokens[t];
621-
let mut token_alts: Array<i32> = [];
622-
for let mut i = 0; i < all_configs.len(); i += 1 {
623-
if array_contains_str(&config_firsts[i], tk) {
624-
if !array_contains_i32(&token_alts, all_configs[i].alt_index) {
625-
token_alts.append(all_configs[i].alt_index);
626-
}
627-
}
628-
}
629-
if token_alts.len() == 1 {
630-
has_improvement = true;
631-
let mut merged = false;
632-
for let mut b = 0; b < branches.len(); b += 1 {
633-
if let Leaf(idx) = branches[b].child {
634-
if idx == token_alts[0] {
635-
branches[b].tokens.append(*tk);
636-
merged = true;
637-
}
638-
}
639-
}
640-
if !merged {
641-
branches.append(PredictionBranch {
642-
tokens: [*tk],
643-
child: PredictionNode::Leaf(token_alts[0]),
644-
});
645-
}
646-
} else {
647-
// Still ambiguous for this token — fall back to Backtrack.
648-
let sorted = token_alts.sorted();
649-
let child = PredictionNode::Backtrack(sorted);
650-
let mut merged = false;
651-
for let mut b = 0; b < branches.len(); b += 1 {
652-
if prediction_node_eq(&branches[b].child, &child) {
653-
branches[b].tokens.append(*tk);
654-
merged = true;
655-
}
656-
}
657-
if !merged {
658-
branches.append(PredictionBranch {
659-
tokens: [*tk],
660-
child,
661-
});
662-
}
663-
}
664-
}
665-
if !has_improvement || branches.is_empty() {
666-
return null;
667-
}
668-
// Only use if ALL branches are Leaf — any Backtrack branch means the expansion
669-
// didn't fully resolve, which can cause incorrect dispatching on sub-rule tokens.
670-
for let b of branches {
671-
if let Backtrack(_) = b.child {
672-
return null;
673-
}
674-
}
675-
return Option::<PredictionNode>::Some(PredictionNode::Dispatch(PredictionDispatch { depth, branches }));
676-
}
677482

678483
/// Check if all configs are at the same terminal element (for Consume).
679484
fn sll_find_common_terminal(configs: &Array<SllConfig>) -> Option<Element> {

0 commit comments

Comments
 (0)