Skip to content
13 changes: 0 additions & 13 deletions package-gale/TODO.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
# Gale TODO

## Incomplete CST walker coverage

The XML unparse output is missing tokens/nodes for several patterns due to `store=false` and non-simple groups:

- **Repeated separators**: `(',' result_column)*` — the `','` and subsequent `result_column` are not stored, so `SELECT a, b` only shows `a`.
- **Token-only groups**: `(K_INSERT | K_REPLACE)` — groups with only TokenRef alternatives are not `is_simple_cst_group` (requires RuleRef alternatives), so `INSERT` keyword is missing from `insert_stmt`.
- **Multi-element single-alt groups**: `(';'+ sql_stmt)*` — the Star inner group has two elements (Plus and RuleRef), not a simple CST group, so second statements are not stored.

Fixing these requires either:

1. Extending `is_simple_cst_group` to handle token-only and mixed groups (generating variant types for tokens too).
2. Or making `gen_repeat` for Star/Plus store all inner elements (requires struct types for group alternatives with multiple elements).

## Code Quality

### parser_gen.wado
Expand Down
2 changes: 1 addition & 1 deletion package-gale/src/g4/integration_test.wado
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ test "parse HTMLLexer.g4" {
found_tag_close = true;
}
if rule.name == "TAG_OPEN" {
assert rule.mode == 0; // DEFAULT_MODE
assert rule.mode == 0; // DEFAULT_MODE
assert rule.push_mode >= 0, "TAG_OPEN should have pushMode(TAG)";
assert g.mode_names[rule.push_mode] == "TAG";
}
Expand Down
4 changes: 3 additions & 1 deletion package-gale/src/g4/parser.wado
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,9 @@ impl G4Parser {
if self.is_kind(G4Token::Question) {
self.advance();
return Result::<LexerElement, ParseError>::Ok(
LexerElement::Repeat(LexerRepeatElement { kind: RepeatKind::Optional, element: elem, non_greedy: false }),
LexerElement::Repeat(
LexerRepeatElement { kind: RepeatKind::Optional, element: elem, non_greedy: false },
),
);
}
return Result::<LexerElement, ParseError>::Ok(elem);
Expand Down
79 changes: 78 additions & 1 deletion package-gale/src/gen_context.wado
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use { ParserRule, Alternative, Element, RepeatElement, LabelElement, RepeatKind } from "./ir.wado";
use { LitToken } from "./lexer_gen.wado";
use { literal_const_name, str_set_from } from "./gen_util.wado";
use { literal_const_name, str_set_from, is_single_alt_group, is_general_group, single_alt_group_type_name, count_general_groups } from "./gen_util.wado";
use { TreeMap } from "core:collections";

pub struct GenContext {
Expand All @@ -9,6 +9,12 @@ pub struct GenContext {
rule_index: TreeMap<String, i32>,
first_cache: TreeMap<String, Array<String>>,
single_token_cache: TreeMap<String, bool>,
pub current_rule_name: String,
pub group_counter: i32,
pub alt_gc_starts: Array<i32>,
/// Maps single-alt group type name → the rule name that first defines it.
/// Used to resolve general group type names inside shared single-alt groups.
single_alt_owner: TreeMap<String, String>,
}

impl GenContext {
Expand All @@ -17,12 +23,17 @@ impl GenContext {
for let mut i = 0; i < parser_rules.len(); i += 1 {
rule_index[parser_rules[i].name] = i;
}
let single_alt_owner = build_single_alt_owner_map(parser_rules);
return GenContext {
parser_rules: *parser_rules,
lit_tokens: *lit_tokens,
rule_index,
first_cache: TreeMap::<String, Array<String>>::new(),
single_token_cache: TreeMap::<String, bool>::new(),
current_rule_name: "",
group_counter: 0,
alt_gc_starts: [],
single_alt_owner,
};
}

Expand Down Expand Up @@ -195,6 +206,72 @@ impl GenContext {

return result;
}

/// Get the rule name that owns the general group types inside a given
/// single-alt group. Returns the current_rule_name if no shared group found.
pub fn general_group_rule_name(&self, single_alt_type_name: &String) -> String {
if let Some(owner) = self.single_alt_owner.get(*single_alt_type_name) {
return owner;
}
return self.current_rule_name;
}
}

/// Build a map from single-alt group type name to the first rule that contains it.
/// This ensures parser gen uses the same type names as the type gen for shared groups.
fn build_single_alt_owner_map(parser_rules: &Array<ParserRule>) -> TreeMap<String, String> {
let mut map = TreeMap::<String, String>::new();
for let rule of parser_rules {
for let alt of &rule.alternatives {
scan_elements_for_single_alt_groups(&alt.elements, &rule.name, &mut map);
}
}
return map;
}

fn scan_elements_for_single_alt_groups(elements: &Array<Element>, rule_name: &String, map: &mut TreeMap<String, String>) {
for let elem of elements {
scan_element_for_single_alt_groups(elem, rule_name, map);
}
}

fn scan_element_for_single_alt_groups(elem: &Element, rule_name: &String, map: &mut TreeMap<String, String>) {
let alts = extract_group_alts_for_scan(elem);
if let Some(group_alts) = alts {
if is_single_alt_group(group_alts) {
let type_name = single_alt_group_type_name(&group_alts[0]);
if !map.contains_key(type_name) {
map[type_name] = *rule_name;
}
// Recurse into the single-alt group's elements
scan_elements_for_single_alt_groups(&group_alts[0].elements, rule_name, map);
} else {
// Recurse into multi-alt group alts
for let alt of group_alts {
scan_elements_for_single_alt_groups(&alt.elements, rule_name, map);
}
}
return;
}
if let Repeat(rep) = elem {
scan_element_for_single_alt_groups(&rep.element, rule_name, map);
return;
}
if let Label(lab) = elem {
scan_element_for_single_alt_groups(&lab.element, rule_name, map);
}
}

fn extract_group_alts_for_scan(elem: &Element) -> Option<&Array<Alternative>> {
if let Group(alts) = elem {
return Option::Some(alts);
}
if let Repeat(rep) = elem {
if let Group(alts) = &rep.element {
return Option::Some(alts);
}
}
return null;
}

pub fn is_nullable(elem: &Element) -> bool {
Expand Down
96 changes: 93 additions & 3 deletions package-gale/src/gen_util.wado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use { Alternative } from "./ir.wado";
use { Alternative, Element } from "./ir.wado";
use { TreeSet } from "core:collections";

pub fn to_pascal_case(name: &String) -> String {
Expand Down Expand Up @@ -241,9 +241,99 @@ pub fn is_single_alt_group(alts: &Array<Alternative>) -> bool {
return alts[0].elements.len() >= 2;
}

/// Check if a group has any storable pattern (simple, token-only, or single-alt).
/// Check if a group has any storable pattern.
/// All multi-alt groups are storable (general groups get variant types).
pub fn is_storable_group(alts: &Array<Alternative>) -> bool {
return is_simple_cst_group(alts) || is_token_only_group(alts) || is_single_alt_group(alts);
return is_simple_cst_group(alts) || is_token_only_group(alts) || is_single_alt_group(alts) || is_general_group(alts);
}

/// A general multi-alt group: 2+ alternatives that don't match any specialized pattern.
pub fn is_general_group(alts: &Array<Alternative>) -> bool {
if alts.len() < 2 {
return false;
}
return !is_simple_cst_group(alts) && !is_token_only_group(alts);
}

/// Count how many general groups are in an element list.
/// Matches the type gen's counting logic (looks through Repeat wrappers).
pub fn count_general_groups(elements: &Array<Element>) -> i32 {
let mut count: i32 = 0;
for let elem of elements {
count += count_element_groups(elem);
}
return count;
}

pub fn count_element_groups(elem: &Element) -> i32 {
if let Group(alts) = elem {
if is_general_group(alts) {
// 1 for the group itself + max nested count across alts
let mut max_nested: i32 = 0;
for let alt of alts {
let nested = count_general_groups(&alt.elements);
if nested > max_nested {
max_nested = nested;
}
}
return 1 + max_nested;
}
// Non-matching group: recurse into alts to find nested general groups
let mut max_nested: i32 = 0;
for let alt of alts {
let nested = count_general_groups(&alt.elements);
if nested > max_nested {
max_nested = nested;
}
}
return max_nested;
}
if let Repeat(rep) = elem {
if let Group(alts) = &rep.element {
if is_general_group(alts) {
let mut max_nested: i32 = 0;
for let alt of alts {
let nested = count_general_groups(&alt.elements);
if nested > max_nested {
max_nested = nested;
}
}
return 1 + max_nested;
}
// Non-matching group inside Repeat: recurse into alts
let mut max_nested: i32 = 0;
for let alt of alts {
let nested = count_general_groups(&alt.elements);
if nested > max_nested {
max_nested = nested;
}
}
return max_nested;
}
return count_element_groups(&rep.element);
}
if let Label(lab) = elem {
return count_element_groups(&lab.element);
}
return 0;
}

/// Generate a variant type name for a general multi-alt group.
/// Uses parent rule name + group index: "InsertStmtGroup0"
pub fn general_group_type_name(rule_name: &String, group_index: i32) -> String {
return `{to_pascal_case(rule_name)}Group{group_index}`;
}

/// Generate a field base name for a general multi-alt group.
/// Uses group index: "group_0"
pub fn general_group_field_name(group_index: i32) -> String {
return `group_{group_index}`;
}

/// Generate a struct name for a specific alternative in a general group.
/// Uses parent rule name + group index + alt index: "InsertStmtGroup0Alt1"
pub fn general_group_alt_struct_name(rule_name: &String, group_index: i32, alt_index: i32) -> String {
return `{to_pascal_case(rule_name)}Group{group_index}Alt{alt_index}`;
}

/// Get a field name for the elements in a single-alt group's element.
Expand Down
Loading
Loading