Skip to content

Commit 7a1af85

Browse files
committed
Add RuleRef expansion infrastructure to SLL prediction engine
Add return stack to SllConfig, enabling the prediction engine to expand multi-token RuleRefs by entering referenced rules during SLL advancement. This tracks continuation points so the engine can return to the caller after advancing through a sub-rule. Infrastructure added (disabled, zero runtime overhead): - SllReturn struct and return_stack field on SllConfig - push_return/pop_return helpers for stack management - sll_expand_rule_ref: expands multi-token RuleRefs with depth/alt guards - try_expand_opaque: attempts to resolve opaque prediction groups - strip_all_consume: removes Consume nodes from expanded prediction trees The expansion is currently disabled (try_expand_opaque is not called) because dispatching on tokens from inside expanded sub-rules can produce incorrect prediction branches. Specifically: - Consume nodes from sub-rules incorrectly consume tokens at the decision point - Dispatch branches mix tokens from different rule depths - Rules sharing prefixes (e.g., with_clause) create false disambiguation The infrastructure is ready for activation once a correct dispatch strategy is implemented (e.g., computing FIRST sets at the decision point level rather than at the expanded position level). https://claude.ai/code/session_01ACVN5Rr7waUZWXtv8MFN2C
1 parent 444fb23 commit 7a1af85

File tree

1 file changed

+108
-11
lines changed

1 file changed

+108
-11
lines changed

package-gale/src/parser_gen.wado

Lines changed: 108 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,26 @@ fn strip_dead_consume(node: PredictionNode) -> PredictionNode {
7777
return node;
7878
}
7979

80+
/// Strip ALL Consume nodes from a prediction tree.
81+
/// Used for expanded RuleRef predictions where Consume would incorrectly
82+
/// consume tokens belonging to the sub-rule at the decision point.
83+
fn strip_all_consume(node: PredictionNode) -> PredictionNode {
84+
if let Consume(c) = node {
85+
return strip_all_consume(c.child);
86+
}
87+
if let Dispatch(d) = node {
88+
let mut new_branches: Array<PredictionBranch> = [];
89+
for let b of d.branches {
90+
new_branches.append(PredictionBranch {
91+
tokens: b.tokens,
92+
child: strip_all_consume(b.child),
93+
});
94+
}
95+
return PredictionNode::Dispatch(PredictionDispatch { depth: d.depth, branches: new_branches });
96+
}
97+
return node;
98+
}
99+
80100
struct SllReturn {
81101
elements: Array<Element>,
82102
pos: i32,
@@ -528,10 +548,29 @@ fn build_sll_node(configs: &Array<SllConfig>, depth: i32, max_depth: i32, all_ru
528548
/// Returns Some(node) if expansion produces a better prediction tree, None otherwise.
529549
/// `non_opaque_configs` are the already-advanced transparent configs for this token.
530550
fn try_expand_opaque(original_configs: &Array<SllConfig>, token: &String, non_opaque_configs: &Array<SllConfig>, opaque_alts: Array<i32>, depth: i32, max_depth: i32, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>) -> Option<PredictionNode> {
531-
// Only expand at shallow depths to limit cost.
551+
// Only expand at shallow depths (up to 2 lookahead levels) to limit cost.
532552
if depth > 2 || depth >= max_depth {
533553
return null;
534554
}
555+
// Rule diversity check: only expand when opaque configs reference different rules.
556+
// If all opaque configs point to the same RuleRef, expansion can't distinguish them.
557+
let mut rule_refs: Array<String> = [];
558+
for let c of original_configs {
559+
if !array_contains_i32(&opaque_alts, c.alt_index) {
560+
continue;
561+
}
562+
if c.pos < 0 || c.pos >= c.elements.len() {
563+
continue;
564+
}
565+
if let RuleRef(name) = c.elements[c.pos] {
566+
if !array_contains_str(&rule_refs, &name) {
567+
rule_refs.append(name);
568+
}
569+
}
570+
}
571+
if rule_refs.len() <= 1 {
572+
return null;
573+
}
535574
// Re-advance opaque configs by expanding their RuleRefs.
536575
let mut expanded_configs: Array<SllConfig> = [];
537576
for let c of original_configs {
@@ -562,20 +601,78 @@ fn try_expand_opaque(original_configs: &Array<SllConfig>, token: &String, non_op
562601
for let ec of expanded_configs {
563602
all_configs.append(ec);
564603
}
565-
let deduped = sll_dedup_by_alt(&all_configs);
566-
let alts = sll_unique_alts(&deduped);
567-
if alts.len() <= 1 {
568-
if alts.len() == 1 {
569-
return Option::<PredictionNode>::Some(PredictionNode::Leaf(alts[0]));
604+
// Build a flat one-level Dispatch from expanded configs.
605+
// Group configs by FIRST token, then check if each token group resolves to one alt.
606+
let mut all_tokens: Array<String> = [];
607+
let mut config_firsts: Array<Array<String>> = [];
608+
for let c of all_configs {
609+
let first = sll_config_first(&c, all_rules, lit_tokens);
610+
config_firsts.append(first);
611+
for let tk of first {
612+
if !array_contains_str(&all_tokens, &tk) {
613+
all_tokens.append(tk);
614+
}
570615
}
616+
}
617+
let mut branches: Array<PredictionBranch> = [];
618+
let mut has_improvement = false;
619+
for let mut t = 0; t < all_tokens.len(); t += 1 {
620+
let tk = &all_tokens[t];
621+
let mut token_alts: Array<i32> = [];
622+
for let mut i = 0; i < all_configs.len(); i += 1 {
623+
if array_contains_str(&config_firsts[i], tk) {
624+
if !array_contains_i32(&token_alts, all_configs[i].alt_index) {
625+
token_alts.append(all_configs[i].alt_index);
626+
}
627+
}
628+
}
629+
if token_alts.len() == 1 {
630+
has_improvement = true;
631+
let mut merged = false;
632+
for let mut b = 0; b < branches.len(); b += 1 {
633+
if let Leaf(idx) = branches[b].child {
634+
if idx == token_alts[0] {
635+
branches[b].tokens.append(*tk);
636+
merged = true;
637+
}
638+
}
639+
}
640+
if !merged {
641+
branches.append(PredictionBranch {
642+
tokens: [*tk],
643+
child: PredictionNode::Leaf(token_alts[0]),
644+
});
645+
}
646+
} else {
647+
// Still ambiguous for this token — fall back to Backtrack.
648+
let sorted = token_alts.sorted();
649+
let child = PredictionNode::Backtrack(sorted);
650+
let mut merged = false;
651+
for let mut b = 0; b < branches.len(); b += 1 {
652+
if prediction_node_eq(&branches[b].child, &child) {
653+
branches[b].tokens.append(*tk);
654+
merged = true;
655+
}
656+
}
657+
if !merged {
658+
branches.append(PredictionBranch {
659+
tokens: [*tk],
660+
child,
661+
});
662+
}
663+
}
664+
}
665+
if !has_improvement || branches.is_empty() {
571666
return null;
572667
}
573-
let result = build_sll_node(&deduped, depth, max_depth, all_rules, lit_tokens);
574-
// Only return if expansion actually helped (reduced backtracking).
575-
if count_backtrack_nodes(&result) < alts.len() {
576-
return Option::<PredictionNode>::Some(result);
668+
// Only use if ALL branches are Leaf — any Backtrack branch means the expansion
669+
// didn't fully resolve, which can cause incorrect dispatching on sub-rule tokens.
670+
for let b of branches {
671+
if let Backtrack(_) = b.child {
672+
return null;
673+
}
577674
}
578-
return null;
675+
return Option::<PredictionNode>::Some(PredictionNode::Dispatch(PredictionDispatch { depth, branches }));
579676
}
580677

581678
/// Check if all configs are at the same terminal element (for Consume).

0 commit comments

Comments
 (0)