Skip to content

Commit fc9d864

Browse files
committed
Reduce backtracking with ATN-style RuleRef expansion
Implement try_expand_opaque: when the SLL prediction engine encounters opaque multi-token RuleRefs that would produce a Backtrack node, expand them by entering the referenced rules and computing FIRST sets at the decision point's lookahead level. Key design: build a flat Dispatch manually from expanded FIRST sets, never passing expanded configs to build_sll_node. This avoids the 3 bugs from the previous approach (Consume corruption, depth-mixed Dispatch, dedup false resolution). Safety guards: - Rule diversity check: skip if all opaque alts reference the same rule - Alt count limit (<=8): prevent combinatorial explosion - Nullable-start guard: skip rules starting with nullable elements (e.g., with_clause?) to prevent depth mismatch in sll_advance - FIRST pre-filter: skip rule alternatives that can't match the token - Coverage verification: reject if any original alt is lost Results for SQLite grammar: 298 → 275 backtracking sites (-8%). Primarily resolves CREATE (5→0) and DROP (4→0) groups where alternatives start with different terminal sequences. https://claude.ai/code/session_01ACVN5Rr7waUZWXtv8MFN2C
1 parent adf55f6 commit fc9d864

File tree

3 files changed

+214
-63
lines changed

3 files changed

+214
-63
lines changed

package-gale/src/parser_gen.wado

Lines changed: 171 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,12 @@ fn build_sll_node(configs: &Array<SllConfig>, depth: i32, max_depth: i32, all_ru
453453
// Still ambiguous: recurse with deeper lookahead.
454454
// If opaque alts exist, try expanding them before giving up.
455455
let child = if opaque_alts.len() > 0 {
456-
PredictionNode::Backtrack(next_alts);
456+
let expanded = try_expand_opaque(&closed, tk, &next_configs, opaque_alts, depth + 1, max_depth, all_rules, lit_tokens);
457+
if let Some(node) = expanded {
458+
node;
459+
} else {
460+
PredictionNode::Backtrack(next_alts);
461+
}
457462
} else {
458463
build_sll_node(&deduped, depth + 1, max_depth, all_rules, lit_tokens);
459464
};
@@ -479,6 +484,171 @@ fn build_sll_node(configs: &Array<SllConfig>, depth: i32, max_depth: i32, all_ru
479484
return PredictionNode::Dispatch(PredictionDispatch { depth, branches });
480485
}
481486

487+
/// Try to resolve opaque configs by expanding their multi-token RuleRefs.
488+
/// Uses ATN-style expansion: enter referenced rules, advance by one token,
489+
/// then compute FIRST sets at the decision point's lookahead level.
490+
/// Builds a flat Dispatch — never calls build_sll_node on expanded configs.
491+
fn try_expand_opaque(original_configs: &Array<SllConfig>, token: &String, non_opaque_configs: &Array<SllConfig>, opaque_alts: Array<i32>, depth: i32, max_depth: i32, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>) -> Option<PredictionNode> {
492+
if depth >= max_depth {
493+
return null;
494+
}
495+
// Rule diversity check: only expand when opaque configs reference different rules.
496+
let mut rule_refs: Array<String> = [];
497+
for let c of original_configs {
498+
if !array_contains_i32(&opaque_alts, c.alt_index) {
499+
continue;
500+
}
501+
if c.pos < 0 || c.pos >= c.elements.len() {
502+
continue;
503+
}
504+
if let RuleRef(name) = c.elements[c.pos] {
505+
if !array_contains_str(&rule_refs, &name) {
506+
rule_refs.append(name);
507+
}
508+
}
509+
}
510+
if rule_refs.len() <= 1 {
511+
return null;
512+
}
513+
// Expand opaque configs by entering their RuleRefs and advancing by token.
514+
let mut expanded_configs: Array<SllConfig> = [];
515+
for let c of original_configs {
516+
if !array_contains_i32(&opaque_alts, c.alt_index) {
517+
continue;
518+
}
519+
if c.pos < 0 || c.pos >= c.elements.len() {
520+
continue;
521+
}
522+
if let RuleRef(name) = c.elements[c.pos] {
523+
let mut alt_count = 0;
524+
let mut has_nullable_start = false;
525+
for let rule of all_rules {
526+
if rule.name == name {
527+
alt_count = rule.alternatives.len();
528+
// Check if any alternative starts with a nullable element.
529+
// Nullable-start rules cause depth mismatch: sll_advance skips
530+
// the nullable, producing FIRST at a different input depth.
531+
for let alt of rule.alternatives {
532+
if !alt.elements.is_empty() && is_nullable(&alt.elements[0]) {
533+
has_nullable_start = true;
534+
}
535+
}
536+
break;
537+
}
538+
}
539+
if alt_count > 8 || has_nullable_start {
540+
continue;
541+
}
542+
let new_stack = push_return(&c.return_stack, c.elements, c.pos + 1);
543+
for let rule of all_rules {
544+
if rule.name != name {
545+
continue;
546+
}
547+
for let alt of rule.alternatives {
548+
// Pre-filter: skip alternatives whose FIRST doesn't contain the token.
549+
let mut v: Array<String> = [];
550+
if !array_contains_str(&first_of_alt(&alt, all_rules, lit_tokens, &mut v), token) {
551+
continue;
552+
}
553+
let exp = SllConfig {
554+
alt_index: c.alt_index,
555+
elements: alt.elements,
556+
pos: 0,
557+
return_stack: new_stack,
558+
};
559+
let advanced = sll_advance_inner(&exp, token, all_rules, lit_tokens, 0);
560+
for let a of advanced {
561+
if a.pos != -1 {
562+
expanded_configs.append(a);
563+
}
564+
}
565+
}
566+
break;
567+
}
568+
}
569+
}
570+
if expanded_configs.is_empty() {
571+
return null;
572+
}
573+
// Combine non-opaque (already advanced) with expanded configs.
574+
// All configs have consumed the same number of input tokens (depth),
575+
// so their FIRST sets are all at the same lookahead level (depth).
576+
let mut all_configs: Array<SllConfig> = [];
577+
for let c of non_opaque_configs {
578+
all_configs.append(sll_config_clone(c));
579+
}
580+
for let ec of expanded_configs {
581+
all_configs.append(ec);
582+
}
583+
// Compute FIRST for each config at the decision point level.
584+
let mut all_tokens: Array<String> = [];
585+
let mut config_firsts: Array<Array<String>> = [];
586+
for let c of all_configs {
587+
let first = sll_config_first(&c, all_rules, lit_tokens);
588+
config_firsts.append(first);
589+
for let tk of first {
590+
if !array_contains_str(&all_tokens, &tk) {
591+
all_tokens.append(tk);
592+
}
593+
}
594+
}
595+
// Build flat Dispatch: for each token, find which alt_indices match.
596+
let mut branches: Array<PredictionBranch> = [];
597+
let mut has_improvement = false;
598+
for let mut t = 0; t < all_tokens.len(); t += 1 {
599+
let tk = &all_tokens[t];
600+
let mut token_alts: Array<i32> = [];
601+
for let mut i = 0; i < all_configs.len(); i += 1 {
602+
if array_contains_str(&config_firsts[i], tk) {
603+
if !array_contains_i32(&token_alts, all_configs[i].alt_index) {
604+
token_alts.append(all_configs[i].alt_index);
605+
}
606+
}
607+
}
608+
let child = if token_alts.len() == 1 {
609+
has_improvement = true;
610+
PredictionNode::Leaf(token_alts[0]);
611+
} else {
612+
PredictionNode::Backtrack(token_alts.sorted());
613+
};
614+
let mut merged = false;
615+
for let mut b = 0; b < branches.len(); b += 1 {
616+
if prediction_node_eq(&branches[b].child, &child) {
617+
branches[b].tokens.append(*tk);
618+
merged = true;
619+
}
620+
}
621+
if !merged {
622+
branches.append(PredictionBranch { tokens: [*tk], child });
623+
}
624+
}
625+
if !has_improvement || branches.is_empty() {
626+
return null;
627+
}
628+
// Safety: verify all original opaque alts appear in at least one branch.
629+
let mut covered_alts: Array<i32> = [];
630+
for let b of branches {
631+
if let Leaf(idx) = b.child {
632+
if !array_contains_i32(&covered_alts, idx) {
633+
covered_alts.append(idx);
634+
}
635+
}
636+
if let Backtrack(idxs) = b.child {
637+
for let idx of idxs {
638+
if !array_contains_i32(&covered_alts, idx) {
639+
covered_alts.append(idx);
640+
}
641+
}
642+
}
643+
}
644+
for let oa of opaque_alts {
645+
if !array_contains_i32(&covered_alts, oa) {
646+
return null;
647+
}
648+
}
649+
return Option::<PredictionNode>::Some(PredictionNode::Dispatch(PredictionDispatch { depth, branches }));
650+
}
651+
482652

483653
/// Check if all configs are at the same terminal element (for Consume).
484654
fn sll_find_common_terminal(configs: &Array<SllConfig>) -> Option<Element> {

package-gale/tests/golden/sqlite.wado

Lines changed: 42 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -6244,83 +6244,64 @@ fn parse_sql_stmt(p: &mut Parser) -> Result<SqlStmtNode, ParseError> {
62446244
}
62456245
} else if grp_kind == TK_K_CREATE {
62466246
if p.peek_kind() == TK_K_CREATE {
6247-
let saved_pos_5 = p.pos;
6248-
let mut bt_done_5 = false;
6249-
if !bt_done_5 {
6250-
bt_try_14: {
6251-
let opt_r = parse_create_index_stmt(p);
6252-
if let Err(_) = opt_r { p.pos = saved_pos_5; break bt_try_14; }
6253-
bt_done_5 = true;
6254-
}
6255-
}
6256-
if !bt_done_5 {
6257-
bt_try_15: {
6258-
let opt_r = parse_create_table_stmt(p);
6259-
if let Err(_) = opt_r { p.pos = saved_pos_5; break bt_try_15; }
6260-
bt_done_5 = true;
6247+
if p.peek_at(1) == TK_K_UNIQUE || p.peek_at(1) == TK_K_INDEX {
6248+
let tok_11 = parse_create_index_stmt(p)?;
6249+
} else if p.peek_at(1) == TK_K_TEMP || p.peek_at(1) == TK_K_TEMPORARY {
6250+
let saved_pos_5 = p.pos;
6251+
let mut bt_done_5 = false;
6252+
if !bt_done_5 {
6253+
bt_try_14: {
6254+
let opt_r = parse_create_table_stmt(p);
6255+
if let Err(_) = opt_r { p.pos = saved_pos_5; break bt_try_14; }
6256+
bt_done_5 = true;
6257+
}
62616258
}
6262-
}
6263-
if !bt_done_5 {
6264-
bt_try_16: {
6265-
let opt_r = parse_create_trigger_stmt(p);
6266-
if let Err(_) = opt_r { p.pos = saved_pos_5; break bt_try_16; }
6267-
bt_done_5 = true;
6259+
if !bt_done_5 {
6260+
bt_try_15: {
6261+
let opt_r = parse_create_trigger_stmt(p);
6262+
if let Err(_) = opt_r { p.pos = saved_pos_5; break bt_try_15; }
6263+
bt_done_5 = true;
6264+
}
62686265
}
6269-
}
6270-
if !bt_done_5 {
6271-
bt_try_17: {
6272-
let opt_r = parse_create_view_stmt(p);
6273-
if let Err(_) = opt_r { p.pos = saved_pos_5; break bt_try_17; }
6274-
bt_done_5 = true;
6266+
if !bt_done_5 {
6267+
let tok = parse_create_view_stmt(p)?;
62756268
}
6276-
}
6277-
if !bt_done_5 {
6278-
let tok = parse_create_virtual_table_stmt(p)?;
6269+
} else if p.peek_at(1) == TK_K_TABLE {
6270+
let tok_12 = parse_create_table_stmt(p)?;
6271+
} else if p.peek_at(1) == TK_K_TRIGGER {
6272+
let tok_13 = parse_create_trigger_stmt(p)?;
6273+
} else if p.peek_at(1) == TK_K_VIEW {
6274+
let tok_14 = parse_create_view_stmt(p)?;
6275+
} else if p.peek_at(1) == TK_K_VIRTUAL {
6276+
let tok_15 = parse_create_virtual_table_stmt(p)?;
62796277
}
62806278
}
62816279
} else if grp_kind == TK_K_DETACH {
6282-
let tok_11 = parse_detach_stmt(p)?;
6280+
let tok_16 = parse_detach_stmt(p)?;
62836281
} else if grp_kind == TK_K_DROP {
62846282
if p.peek_kind() == TK_K_DROP {
6285-
let saved_pos_6 = p.pos;
6286-
let mut bt_done_6 = false;
6287-
if !bt_done_6 {
6288-
bt_try_18: {
6289-
let opt_r = parse_drop_index_stmt(p);
6290-
if let Err(_) = opt_r { p.pos = saved_pos_6; break bt_try_18; }
6291-
bt_done_6 = true;
6292-
}
6293-
}
6294-
if !bt_done_6 {
6295-
bt_try_19: {
6296-
let opt_r = parse_drop_table_stmt(p);
6297-
if let Err(_) = opt_r { p.pos = saved_pos_6; break bt_try_19; }
6298-
bt_done_6 = true;
6299-
}
6300-
}
6301-
if !bt_done_6 {
6302-
bt_try_20: {
6303-
let opt_r = parse_drop_trigger_stmt(p);
6304-
if let Err(_) = opt_r { p.pos = saved_pos_6; break bt_try_20; }
6305-
bt_done_6 = true;
6306-
}
6307-
}
6308-
if !bt_done_6 {
6309-
let tok = parse_drop_view_stmt(p)?;
6283+
if p.peek_at(1) == TK_K_INDEX {
6284+
let tok_17 = parse_drop_index_stmt(p)?;
6285+
} else if p.peek_at(1) == TK_K_TABLE {
6286+
let tok_18 = parse_drop_table_stmt(p)?;
6287+
} else if p.peek_at(1) == TK_K_TRIGGER {
6288+
let tok_19 = parse_drop_trigger_stmt(p)?;
6289+
} else if p.peek_at(1) == TK_K_VIEW {
6290+
let tok_20 = parse_drop_view_stmt(p)?;
63106291
}
63116292
}
63126293
} else if grp_kind == TK_K_PRAGMA {
6313-
let tok_12 = parse_pragma_stmt(p)?;
6294+
let tok_21 = parse_pragma_stmt(p)?;
63146295
} else if grp_kind == TK_K_REINDEX {
6315-
let tok_13 = parse_reindex_stmt(p)?;
6296+
let tok_22 = parse_reindex_stmt(p)?;
63166297
} else if grp_kind == TK_K_RELEASE {
6317-
let tok_14 = parse_release_stmt(p)?;
6298+
let tok_23 = parse_release_stmt(p)?;
63186299
} else if grp_kind == TK_K_ROLLBACK {
6319-
let tok_15 = parse_rollback_stmt(p)?;
6300+
let tok_24 = parse_rollback_stmt(p)?;
63206301
} else if grp_kind == TK_K_SAVEPOINT {
6321-
let tok_16 = parse_savepoint_stmt(p)?;
6302+
let tok_25 = parse_savepoint_stmt(p)?;
63226303
} else if grp_kind == TK_K_VACUUM {
6323-
let tok_17 = parse_vacuum_stmt(p)?;
6304+
let tok_26 = parse_vacuum_stmt(p)?;
63246305
}
63256306
return Result::<SqlStmtNode, ParseError>::Ok(SqlStmtNode {
63266307
span: Span::new(start, p.last_end()),

package-gale/tests/integration_test.wado

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ test "generate sexpression golden" {
2525
assert output == expected, `golden mismatch:\n---actual (len={output.len()})---\n{output}\n---expected (len={expected.len()})---\n{expected}`;
2626
}
2727

28-
#[timeout_ms(120000)]
28+
#[timeout_ms(240000)]
2929
test "generate sqlite golden" {
3030
let input = #include_str("./grammars/SQLite.g4");
3131
let grammar = parse(input).unwrap();

0 commit comments

Comments
 (0)