Skip to content

Commit 5b8974c

Browse files
authored
Merge pull request #732 from wado-lang/claude/remove-store-option-ROr85
Remove `store` parameter from `gen_element`
2 parents c49e461 + 0d12a6d commit 5b8974c

File tree

5 files changed

+730
-748
lines changed

5 files changed

+730
-748
lines changed

package-gale/TODO.md

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,5 @@
11
# Gale TODO
22

3-
## Remove `store` parameter from `gen_element`
4-
5-
`gen_element` has a `store: bool` parameter that controls generated variable naming:
6-
7-
- `store=true`: `let field_name = parse_xxx(p)?;` (meaningful name for struct field assignment)
8-
- `store=false`: `let tok = parse_xxx(p)?;` (throwaway name)
9-
10-
`store=false` is a workaround for a naming collision problem in `dedup_name`. The underlying issue is that `dedup_name` assumes sequential variable creation, but branching code (optionals, groups, prediction trees) creates variables in different scopes:
11-
12-
```
13-
// gen_optional_with_lookahead generates:
14-
if condition {
15-
let k_limit = p.expect(...)?;
16-
let expr = parse_expr(p)?; // dedup count 0
17-
if grp_kind == TK_K_OFFSET { ... }
18-
let expr = parse_expr(p)?; // dedup count 0 again → collision!
19-
}
20-
```
21-
22-
With `store=false`, both become `let tok = ...` / `let tok_2 = ...` (separate namespace), avoiding the collision. But this is a hack — all generated variables should use meaningful names.
23-
24-
### Root cause
25-
26-
`dedup_name` tracks a flat counter per name. Optional/group paths increment this counter even though their variables are scoped to if-blocks. When mandatory elements follow, `gen_field_assignments` expects names at count N but the parsing code produced count N+K (shifted by optional paths).
27-
28-
### Fix approach
29-
30-
1. **Scope-aware naming**: Track which variables are in which scope. Variables inside if/else blocks don't affect the parent scope's counter.
31-
2. **Or**: Make `gen_field_assignments` use the same `name_counts` instance that the parsing code used, instead of replaying from scratch.
32-
3. **Or**: Wrap all non-field parsing (groups, optionals, prediction branches) in labeled scope blocks (`__scope: { ... }`) and use fresh `name_counts` inside. This isolates variable names to their scope.
33-
34-
Approach 3 was partially implemented but incomplete — optional paths that share elements with the enclosing alt (e.g., `LIMIT expr ((OFFSET|',') expr)?` where `expr` appears both in the alt and inside the optional) need the parent's dedup counter to produce `expr_2` for the second occurrence.
35-
36-
A complete fix likely requires combining approaches: scope blocks for isolation + parent counter awareness for shared element types.
37-
383
## Incomplete CST walker coverage
394

405
The XML unparse output is missing tokens/nodes for several patterns due to `store=false` and non-simple groups:

package-gale/src/parser_gen.wado

Lines changed: 47 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,7 +1265,7 @@ fn gen_alt_body_skip(w: &mut CodeWriter, type_name: &String, variant_case: Optio
12651265
gen_repeat(w, &rep, all_rules, lit_tokens, &mut name_counts);
12661266
}
12671267
} else {
1268-
gen_element(w, elem, all_rules, lit_tokens, true, &mut name_counts);
1268+
gen_element(w, elem, all_rules, lit_tokens, &mut name_counts);
12691269
}
12701270
}
12711271

@@ -1349,7 +1349,7 @@ fn gen_alt_elements(w: &mut CodeWriter, elements: &Array<Element>, all_rules: &A
13491349
gen_repeat(w, &rep, all_rules, lit_tokens, &mut name_counts);
13501350
}
13511351
} else {
1352-
gen_element(w, elem, all_rules, lit_tokens, true, &mut name_counts);
1352+
gen_element(w, elem, all_rules, lit_tokens, &mut name_counts);
13531353
}
13541354
}
13551355
}
@@ -1363,10 +1363,10 @@ fn gen_elements_with_non_greedy(w: &mut CodeWriter, elements: &Array<Element>, a
13631363
let follow_second = compute_follow_second(elements, i + 1, all_rules, lit_tokens);
13641364
gen_repeat_non_greedy(w, &rep, all_rules, lit_tokens, &follow, &follow_second, name_counts);
13651365
} else {
1366-
gen_element(w, elem, all_rules, lit_tokens, false, name_counts);
1366+
gen_element(w, elem, all_rules, lit_tokens, name_counts);
13671367
}
13681368
} else {
1369-
gen_element(w, elem, all_rules, lit_tokens, false, name_counts);
1369+
gen_element(w, elem, all_rules, lit_tokens, name_counts);
13701370
}
13711371
}
13721372
}
@@ -1381,55 +1381,40 @@ fn is_ruleref_optional(elem: &Element) -> bool {
13811381
return false;
13821382
}
13831383

1384-
fn gen_element(w: &mut CodeWriter, elem: &Element, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>, store: bool, name_counts: &mut Array<[String, i32]>) {
1384+
fn gen_element(w: &mut CodeWriter, elem: &Element, all_rules: &Array<ParserRule>, lit_tokens: &Array<LitToken>, name_counts: &mut Array<[String, i32]>) {
13851385
if let TokenRef(name) = *elem {
13861386
let const_name = `TK_{name}`;
1387-
if store {
1388-
let field_name = dedup_name(&to_lower(&name), name_counts);
1389-
w.line(`let {field_name} = p.expect({const_name}, {escape_string(&name)})?;`);
1390-
} else {
1391-
let discard = dedup_name(&"tok", name_counts);
1392-
w.line(`let {discard} = p.expect({const_name}, {escape_string(&name)})?;`);
1393-
}
1387+
let field_name = dedup_name(&to_lower(&name), name_counts);
1388+
w.line(`let {field_name} = p.expect({const_name}, {escape_string(&name)})?;`);
13941389
return;
13951390
}
13961391
if let Literal(text) = *elem {
13971392
let const_name = literal_const_name(&text);
13981393
let display = `'{text}'`;
1399-
if store {
1400-
let field_name = dedup_name(&literal_field_name(&text), name_counts);
1401-
w.line(`let {field_name} = p.expect({const_name}, {escape_string(&display)})?;`);
1402-
} else {
1403-
let discard = dedup_name(&"tok", name_counts);
1404-
w.line(`let {discard} = p.expect({const_name}, {escape_string(&display)})?;`);
1405-
}
1394+
let field_name = dedup_name(&literal_field_name(&text), name_counts);
1395+
w.line(`let {field_name} = p.expect({const_name}, {escape_string(&display)})?;`);
14061396
return;
14071397
}
14081398
if let RuleRef(name) = *elem {
14091399
let fn_name = `parse_{to_snake_case(&name)}`;
1410-
if store {
1411-
let field_name = dedup_name(&to_snake_case(&name), name_counts);
1412-
w.line(`let {field_name} = {fn_name}(p)?;`);
1413-
} else {
1414-
let discard = dedup_name(&"tok", name_counts);
1415-
w.line(`let {discard} = {fn_name}(p)?;`);
1416-
}
1400+
let field_name = dedup_name(&to_snake_case(&name), name_counts);
1401+
w.line(`let {field_name} = {fn_name}(p)?;`);
14171402
return;
14181403
}
14191404
if let Repeat(rep) = *elem {
14201405
gen_repeat(w, &rep, all_rules, lit_tokens, name_counts);
14211406
return;
14221407
}
14231408
if let Group(alts) = *elem {
1424-
if store && is_simple_cst_group(&alts) {
1409+
if is_simple_cst_group(&alts) {
14251410
gen_consume_group_store_optional(w, &alts, all_rules, lit_tokens, name_counts);
14261411
} else {
14271412
gen_consume_group(w, &alts, all_rules, lit_tokens, name_counts);
14281413
}
14291414
return;
14301415
}
14311416
if let Label(lab) = *elem {
1432-
gen_element(w, &lab.element, all_rules, lit_tokens, store, name_counts);
1417+
gen_element(w, &lab.element, all_rules, lit_tokens, name_counts);
14331418
return;
14341419
}
14351420
}
@@ -1450,7 +1435,7 @@ fn gen_repeat(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Array<ParserR
14501435
if is_group_store {
14511436
if let Group(alts) = *inner { gen_consume_group_store(w, &alts, all_rules, lit_tokens, &mut nc); }
14521437
} else {
1453-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc);
1438+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
14541439
}
14551440
w.line(`{list_name}.append({field_info.0});`);
14561441
w.end();
@@ -1460,7 +1445,7 @@ fn gen_repeat(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Array<ParserR
14601445
if is_group_store {
14611446
if let Group(alts) = *inner { gen_consume_group_store(w, &alts, all_rules, lit_tokens, &mut nc); }
14621447
} else {
1463-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc);
1448+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
14641449
}
14651450
let list_name = dedup_name(&`{field_info.0}_list`, name_counts);
14661451
w.line(`let mut {list_name}: Array<{field_info.1}> = [{field_info.0}];`);
@@ -1469,7 +1454,7 @@ fn gen_repeat(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Array<ParserR
14691454
if is_group_store {
14701455
if let Group(alts) = *inner { gen_consume_group_store(w, &alts, all_rules, lit_tokens, &mut nc2); }
14711456
} else {
1472-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc2);
1457+
gen_element(w, inner, all_rules, lit_tokens, &mut nc2);
14731458
}
14741459
w.line(`{list_name}.append({field_info.0});`);
14751460
w.end();
@@ -1482,7 +1467,7 @@ fn gen_repeat(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Array<ParserR
14821467
if is_group_store {
14831468
if let Group(alts) = *inner { gen_consume_group_store(w, &alts, all_rules, lit_tokens, &mut nc); }
14841469
} else {
1485-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc);
1470+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
14861471
}
14871472
w.line(`Option::<{type_str}>::Some({field_info.0})`);
14881473
w.else_begin("else");
@@ -1493,14 +1478,14 @@ fn gen_repeat(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Array<ParserR
14931478
if let Star = rep.kind {
14941479
w.begin(`while {first_check_str(&first)}`);
14951480
let mut nc: Array<[String, i32]> = [];
1496-
gen_element(w, inner, all_rules, lit_tokens, false, &mut nc);
1481+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
14971482
w.end();
14981483
}
14991484
if let Plus = rep.kind {
1500-
gen_element(w, inner, all_rules, lit_tokens, false, name_counts);
1485+
gen_element(w, inner, all_rules, lit_tokens, name_counts);
15011486
w.begin(`while {first_check_str(&first)}`);
15021487
let mut nc: Array<[String, i32]> = [];
1503-
gen_element(w, inner, all_rules, lit_tokens, false, &mut nc);
1488+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
15041489
w.end();
15051490
}
15061491
if let Optional = rep.kind {
@@ -1578,13 +1563,13 @@ fn gen_repeat_non_greedy(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Ar
15781563
let field_info = element_field_info(inner);
15791564
if is_plus {
15801565
let mut nc: Array<[String, i32]> = [];
1581-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc);
1566+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
15821567
let list_name = dedup_name(&`{field_info.0}_list`, name_counts);
15831568
w.line(`let mut {list_name}: Array<{field_info.1}> = [{field_info.0}];`);
15841569
if while_cond != "false" {
15851570
w.begin(`while {while_cond}`);
15861571
let mut nc2: Array<[String, i32]> = [];
1587-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc2);
1572+
gen_element(w, inner, all_rules, lit_tokens, &mut nc2);
15881573
w.line(`{list_name}.append({field_info.0});`);
15891574
w.end();
15901575
}
@@ -1594,19 +1579,19 @@ fn gen_repeat_non_greedy(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &Ar
15941579
if while_cond != "false" {
15951580
w.begin(`while {while_cond}`);
15961581
let mut nc: Array<[String, i32]> = [];
1597-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc);
1582+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
15981583
w.line(`{list_name}.append({field_info.0});`);
15991584
w.end();
16001585
}
16011586
}
16021587
} else {
16031588
if is_plus {
1604-
gen_element(w, inner, all_rules, lit_tokens, false, name_counts);
1589+
gen_element(w, inner, all_rules, lit_tokens, name_counts);
16051590
}
16061591
if while_cond != "false" {
16071592
w.begin(`while {while_cond}`);
16081593
let mut nc: Array<[String, i32]> = [];
1609-
gen_element(w, inner, all_rules, lit_tokens, false, &mut nc);
1594+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
16101595
w.end();
16111596
}
16121597
}
@@ -1658,7 +1643,7 @@ fn gen_repeat_with_follow(w: &mut CodeWriter, rep: &RepeatElement, all_rules: &A
16581643
let opt_name = dedup_name(&field_info.0, name_counts);
16591644
w.begin(`let {opt_name}: Option<{type_str}> = if {first_check_str(&first)}`);
16601645
let mut nc: Array<[String, i32]> = [];
1661-
gen_element(w, inner, all_rules, lit_tokens, true, &mut nc);
1646+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
16621647
w.line(`Option::<{type_str}>::Some({field_info.0})`);
16631648
w.else_begin("else");
16641649
w.line("null");
@@ -1804,20 +1789,21 @@ fn gen_optional_with_lookahead(w: &mut CodeWriter, group_elements: &Array<Elemen
18041789
}
18051790

18061791
// Generate parsing code for this shape's elements
1792+
let mut nc: Array<[String, i32]> = [];
18071793
for let mut k = 0; k < shape.len(); k += 1 {
18081794
let idx = shape[k];
18091795
let elem = &group_elements[idx];
18101796
let actual = unwrap_optional(elem);
18111797
if let Group(alts) = *actual {
18121798
if alts.len() == 1 {
18131799
for let mut j = 0; j < alts[0].elements.len(); j += 1 {
1814-
gen_element(w, &alts[0].elements[j], all_rules, lit_tokens, false, name_counts);
1800+
gen_element(w, &alts[0].elements[j], all_rules, lit_tokens, &mut nc);
18151801
}
18161802
} else {
1817-
gen_element(w, actual, all_rules, lit_tokens, false, name_counts);
1803+
gen_element(w, actual, all_rules, lit_tokens, &mut nc);
18181804
}
18191805
} else {
1820-
gen_element(w, actual, all_rules, lit_tokens, false, name_counts);
1806+
gen_element(w, actual, all_rules, lit_tokens, &mut nc);
18211807
}
18221808
}
18231809
}
@@ -1915,12 +1901,13 @@ fn gen_optional_with_backtrack(w: &mut CodeWriter, inner: &Element, all_rules: &
19151901
cond.append(cond_parts[i]);
19161902
}
19171903
w.begin(`if {cond}`);
1904+
let mut nc: Array<[String, i32]> = [];
19181905
if let Group(alts) = *inner {
19191906
for let mut i = 0; i < alts[0].elements.len(); i += 1 {
1920-
gen_element(w, &alts[0].elements[i], all_rules, lit_tokens, false, name_counts);
1907+
gen_element(w, &alts[0].elements[i], all_rules, lit_tokens, &mut nc);
19211908
}
19221909
} else {
1923-
gen_element(w, inner, all_rules, lit_tokens, false, name_counts);
1910+
gen_element(w, inner, all_rules, lit_tokens, &mut nc);
19241911
}
19251912
w.end();
19261913
return;
@@ -1933,17 +1920,17 @@ fn gen_optional_with_backtrack(w: &mut CodeWriter, inner: &Element, all_rules: &
19331920
w.line(`let {saved} = p.pos;`);
19341921
w.begin(`{label}:`);
19351922

1923+
let mut bt_nc: Array<[String, i32]> = [];
19361924
if let Group(alts) = *inner {
19371925
if alts.len() == 1 {
19381926
for let elem of alts[0].elements {
1939-
gen_element_failable(w, &elem, all_rules, lit_tokens, &saved, &label, name_counts);
1927+
gen_element_failable(w, &elem, all_rules, lit_tokens, &saved, &label, &mut bt_nc);
19401928
}
19411929
} else {
1942-
let mut nc: Array<[String, i32]> = [];
1943-
gen_element(w, inner, all_rules, lit_tokens, false, &mut nc);
1930+
gen_element(w, inner, all_rules, lit_tokens, &mut bt_nc);
19441931
}
19451932
} else {
1946-
gen_element_failable(w, inner, all_rules, lit_tokens, &saved, &label, name_counts);
1933+
gen_element_failable(w, inner, all_rules, lit_tokens, &saved, &label, &mut bt_nc);
19471934
}
19481935

19491936
w.end(); // label
@@ -2169,11 +2156,13 @@ fn gen_consume_group(w: &mut CodeWriter, alts: &Array<Alternative>, all_rules: &
21692156

21702157
if group.len() == 1 {
21712158
let alt_idx = group[0];
2172-
gen_elements_with_non_greedy(w, &alts[alt_idx].elements, all_rules, lit_tokens, name_counts);
2159+
let mut nc: Array<[String, i32]> = [];
2160+
gen_elements_with_non_greedy(w, &alts[alt_idx].elements, all_rules, lit_tokens, &mut nc);
21732161
} else {
21742162
let mut sorted_group = group.sorted();
21752163
sort_group_by_element_count(&mut sorted_group, alts);
2176-
gen_backtracking_body(w, &sorted_group, alts, all_rules, lit_tokens, name_counts);
2164+
let mut nc: Array<[String, i32]> = [];
2165+
gen_backtracking_body(w, &sorted_group, alts, all_rules, lit_tokens, &mut nc);
21772166
}
21782167
}
21792168
w.end();
@@ -2422,12 +2411,12 @@ fn gen_group_prediction_code_skip(w: &mut CodeWriter, node: &PredictionNode, alt
24222411
continue;
24232412
}
24242413
}
2425-
gen_element(w, &elem, all_rules, lit_tokens, false, name_counts);
2414+
gen_element(w, &elem, all_rules, lit_tokens, name_counts);
24262415
}
24272416
return;
24282417
}
24292418
if let Consume(c) = *node {
2430-
gen_element(w, &c.element, all_rules, lit_tokens, false, name_counts);
2419+
gen_element(w, &c.element, all_rules, lit_tokens, name_counts);
24312420
gen_group_prediction_code_skip(w, &c.child, alts, all_rules, lit_tokens, name_counts, skip + 1);
24322421
return;
24332422
}
@@ -2446,7 +2435,7 @@ fn gen_group_prediction_code_skip(w: &mut CodeWriter, node: &PredictionNode, alt
24462435
w.begin(`if !{done_var}`);
24472436
let mut nc: Array<[String, i32]> = [];
24482437
for let mut i = skip; i < alt.elements.len(); i += 1 {
2449-
gen_element(w, &alt.elements[i], all_rules, lit_tokens, false, &mut nc);
2438+
gen_element(w, &alt.elements[i], all_rules, lit_tokens, &mut nc);
24502439
}
24512440
w.end();
24522441
} else {
@@ -2580,12 +2569,12 @@ fn gen_lr_suffix_helpers(w: &mut CodeWriter, rule: &ParserRule, type_name: &Stri
25802569
let discard = dedup_name(&"op_tok", &mut name_counts);
25812570
w.line(`let {discard} = p.advance();`);
25822571
} else {
2583-
gen_element(w, elem, all_rules, lit_tokens, false, &mut name_counts);
2572+
gen_element(w, elem, all_rules, lit_tokens, &mut name_counts);
25842573
}
25852574
} else if let Repeat(rep) = *elem {
25862575
gen_repeat(w, &rep, all_rules, lit_tokens, &mut name_counts);
25872576
} else {
2588-
gen_element(w, elem, all_rules, lit_tokens, true, &mut name_counts);
2577+
gen_element(w, elem, all_rules, lit_tokens, &mut name_counts);
25892578
}
25902579
}
25912580

@@ -2889,7 +2878,7 @@ fn gen_prediction_code_inner(w: &mut CodeWriter, node: &PredictionNode, fn_name:
28892878
return;
28902879
}
28912880
if let Consume(c) = *node {
2892-
gen_element(w, &c.element, all_rules, lit_tokens, true, name_counts);
2881+
gen_element(w, &c.element, all_rules, lit_tokens, name_counts);
28932882
gen_prediction_code_inner(w, &c.child, fn_name, alts, type_name, all_rules, lit_tokens, skip + 1, name_counts);
28942883
return;
28952884
}

0 commit comments

Comments
 (0)