Skip to content

Commit c64dd7f

Browse files
committed
Issue 473 using a span on Production.
1 parent a14ad77 commit c64dd7f

File tree

3 files changed

+134
-45
lines changed

3 files changed

+134
-45
lines changed

cfgrammar/src/lib/yacc/ast.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ pub struct Production {
107107
pub symbols: Vec<Symbol>,
108108
pub precedence: Option<String>,
109109
pub action: Option<String>,
110+
pub empty_span: Option<Span>,
110111
}
111112

112113
#[derive(Clone, Debug)]
@@ -185,12 +186,14 @@ impl GrammarAST {
185186
symbols: Vec<Symbol>,
186187
precedence: Option<String>,
187188
action: Option<String>,
189+
empty_span: Option<Span>,
188190
) {
189191
self.rules[&rule_name].pidxs.push(self.prods.len());
190192
self.prods.push(Production {
191193
symbols,
192194
precedence,
193195
action,
196+
empty_span,
194197
});
195198
}
196199

@@ -436,7 +439,7 @@ mod test {
436439
let empty_span = Span::new(0, 0);
437440
grm.start = Some(("A".to_string(), empty_span));
438441
grm.add_rule(("B".to_string(), empty_span), None);
439-
grm.add_prod("B".to_string(), vec![], None, None);
442+
grm.add_prod("B".to_string(), vec![], None, None, None);
440443
match grm.complete_and_validate() {
441444
Err(YaccGrammarError {
442445
kind: YaccGrammarErrorKind::InvalidStartRule(_),
@@ -452,7 +455,7 @@ mod test {
452455
let empty_span = Span::new(0, 0);
453456
grm.start = Some(("A".to_string(), empty_span));
454457
grm.add_rule(("A".to_string(), empty_span), None);
455-
grm.add_prod("A".to_string(), vec![], None, None);
458+
grm.add_prod("A".to_string(), vec![], None, None, None);
456459
assert!(grm.complete_and_validate().is_ok());
457460
}
458461

@@ -463,8 +466,8 @@ mod test {
463466
grm.start = Some(("A".to_string(), empty_span));
464467
grm.add_rule(("A".to_string(), empty_span), None);
465468
grm.add_rule(("B".to_string(), empty_span), None);
466-
grm.add_prod("A".to_string(), vec![rule("B")], None, None);
467-
grm.add_prod("B".to_string(), vec![], None, None);
469+
grm.add_prod("A".to_string(), vec![rule("B")], None, None, None);
470+
grm.add_prod("B".to_string(), vec![], None, None, None);
468471
assert!(grm.complete_and_validate().is_ok());
469472
}
470473

@@ -474,7 +477,7 @@ mod test {
474477
let empty_span = Span::new(0, 0);
475478
grm.start = Some(("A".to_string(), empty_span));
476479
grm.add_rule(("A".to_string(), empty_span), None);
477-
grm.add_prod("A".to_string(), vec![rule("B")], None, None);
480+
grm.add_prod("A".to_string(), vec![rule("B")], None, None, None);
478481
match grm.complete_and_validate() {
479482
Err(YaccGrammarError {
480483
kind: YaccGrammarErrorKind::UnknownRuleRef(_),
@@ -491,7 +494,7 @@ mod test {
491494
grm.tokens.insert("b".to_string());
492495
grm.start = Some(("A".to_string(), empty_span));
493496
grm.add_rule(("A".to_string(), empty_span), None);
494-
grm.add_prod("A".to_string(), vec![token("b")], None, None);
497+
grm.add_prod("A".to_string(), vec![token("b")], None, None, None);
495498
assert!(grm.complete_and_validate().is_ok());
496499
}
497500

@@ -504,7 +507,7 @@ mod test {
504507
grm.tokens.insert("b".to_string());
505508
grm.start = Some(("A".to_string(), empty_span));
506509
grm.add_rule(("A".to_string(), empty_span), None);
507-
grm.add_prod("A".to_string(), vec![rule("b")], None, None);
510+
grm.add_prod("A".to_string(), vec![rule("b")], None, None, None);
508511
assert!(grm.complete_and_validate().is_err());
509512
}
510513

@@ -514,7 +517,7 @@ mod test {
514517
let empty_span = Span::new(0, 0);
515518
grm.start = Some(("A".to_string(), empty_span));
516519
grm.add_rule(("A".to_string(), empty_span), None);
517-
grm.add_prod("A".to_string(), vec![token("b")], None, None);
520+
grm.add_prod("A".to_string(), vec![token("b")], None, None, None);
518521
match grm.complete_and_validate() {
519522
Err(YaccGrammarError {
520523
kind: YaccGrammarErrorKind::UnknownToken(_),
@@ -530,7 +533,13 @@ mod test {
530533
let empty_span = Span::new(0, 0);
531534
grm.start = Some(("A".to_string(), empty_span));
532535
grm.add_rule(("A".to_string(), empty_span), None);
533-
grm.add_prod("A".to_string(), vec![rule("b"), token("b")], None, None);
536+
grm.add_prod(
537+
"A".to_string(),
538+
vec![rule("b"), token("b")],
539+
None,
540+
None,
541+
None,
542+
);
534543
match grm.complete_and_validate() {
535544
Err(YaccGrammarError {
536545
kind: YaccGrammarErrorKind::UnknownRuleRef(_),
@@ -546,7 +555,7 @@ mod test {
546555
let empty_span = Span::new(2, 3);
547556
grm.start = Some(("A".to_string(), empty_span));
548557
grm.add_rule(("A".to_string(), empty_span), None);
549-
grm.add_prod("A".to_string(), vec![], None, None);
558+
grm.add_prod("A".to_string(), vec![], None, None, None);
550559
grm.epp
551560
.insert("k".to_owned(), (empty_span, ("v".to_owned(), empty_span)));
552561
match grm.complete_and_validate() {
@@ -580,6 +589,7 @@ mod test {
580589
vec![token("b")],
581590
Some("b".to_string()),
582591
None,
592+
None,
583593
);
584594
assert!(grm.complete_and_validate().is_ok());
585595
}
@@ -595,6 +605,7 @@ mod test {
595605
vec![token("b")],
596606
Some("b".to_string()),
597607
None,
608+
None,
598609
);
599610
match grm.complete_and_validate() {
600611
Err(YaccGrammarError {
@@ -619,11 +630,11 @@ mod test {
619630
let empty_span = Span::new(0, 0);
620631
grm.start = Some(("A".to_string(), empty_span));
621632
grm.add_rule(("A".to_string(), empty_span), None);
622-
grm.add_prod("A".to_string(), vec![], None, None);
633+
grm.add_prod("A".to_string(), vec![], None, None, None);
623634
grm.tokens.insert("b".to_string());
624635
grm.spans.push(Span::new(4, 5));
625636
grm.add_rule(("B".to_string(), Span::new(1, 2)), None);
626-
grm.add_prod("B".to_string(), vec![token("b")], None, None);
637+
grm.add_prod("B".to_string(), vec![token("b")], None, None, None);
627638

628639
assert_eq!(
629640
grm.unused_symbols()

cfgrammar/src/lib/yacc/parser.rs

Lines changed: 93 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ impl YaccParser {
759759
}
760760

761761
fn parse_rule(&mut self, mut i: usize) -> Result<usize, YaccGrammarError> {
762+
let mut empty_span: Option<Span> = None;
762763
let (j, rn) = self.parse_name(i)?;
763764
let span = Span::new(i, j);
764765
if self.ast.start.is_none() {
@@ -803,14 +804,24 @@ impl YaccParser {
803804
i = self.parse_ws(i, true)?;
804805
while i < self.src.len() {
805806
if let Some(j) = self.lookahead_is("|", i) {
806-
self.ast.add_prod(rn.clone(), syms, prec, action);
807+
let span = if syms.is_empty() {
808+
empty_span.take().or(Some(Span::new(i, i)))
809+
} else {
810+
None
811+
};
812+
self.ast.add_prod(rn.clone(), syms, prec, action, span);
807813
syms = Vec::new();
808814
prec = None;
809815
action = None;
810816
i = self.parse_ws(j, true)?;
811817
continue;
812818
} else if let Some(j) = self.lookahead_is(";", i) {
813-
self.ast.add_prod(rn, syms, prec, action);
819+
let empty_span = if syms.is_empty() {
820+
empty_span.take().or(Some(Span::new(i, i)))
821+
} else {
822+
None
823+
};
824+
self.ast.add_prod(rn, syms, prec, action, empty_span);
814825
return Ok(j);
815826
}
816827

@@ -838,18 +849,20 @@ impl YaccParser {
838849
return Err(self.mk_error(YaccGrammarErrorKind::ProductionNotTerminated, i));
839850
}
840851
} else if let Some(mut j) = self.lookahead_is("%empty", i) {
852+
let k = self.parse_ws(j, true)?;
841853
j = self.parse_ws(j, true)?;
842854
// %empty could be followed by all sorts of weird syntax errors: all we try and do
843855
// is say "does this production look like it's finished" and trust that the other
844856
// errors will be caught by other parts of the parser.
845857
if !syms.is_empty()
846-
| !(self.lookahead_is("|", j).is_some()
847-
|| self.lookahead_is(";", j).is_some()
848-
|| self.lookahead_is("{", j).is_some())
858+
| !(self.lookahead_is("|", k).is_some()
859+
|| self.lookahead_is(";", k).is_some()
860+
|| self.lookahead_is("{", k).is_some())
849861
{
850862
return Err(self.mk_error(YaccGrammarErrorKind::NonEmptyProduction, i));
851863
}
852-
i = j;
864+
empty_span = Some(Span::new(i, j));
865+
i = k;
853866
} else {
854867
let (j, sym, span) = self.parse_token(i)?;
855868
if self.ast.tokens.contains(&sym) {
@@ -1322,7 +1335,8 @@ mod test {
13221335
Production {
13231336
symbols: vec![token_span("a", a_span)],
13241337
precedence: None,
1325-
action: None
1338+
action: None,
1339+
empty_span: None,
13261340
}
13271341
);
13281342
assert_eq!(&src[a_span.start()..a_span.end()], "a");
@@ -1347,7 +1361,8 @@ mod test {
13471361
Production {
13481362
symbols: vec![token_span("a", a_span)],
13491363
precedence: None,
1350-
action: None
1364+
action: None,
1365+
empty_span: None,
13511366
}
13521367
);
13531368
assert_eq!(&src[a_span.start()..a_span.end()], "a");
@@ -1357,7 +1372,8 @@ mod test {
13571372
Production {
13581373
symbols: vec![token_span("b", Span::new(54, 55))],
13591374
precedence: None,
1360-
action: None
1375+
action: None,
1376+
empty_span: None,
13611377
}
13621378
);
13631379
assert_eq!(&src[b_span.start()..b_span.end()], "b");
@@ -1383,7 +1399,8 @@ mod test {
13831399
Production {
13841400
symbols: vec![],
13851401
precedence: None,
1386-
action: None
1402+
action: None,
1403+
empty_span: Some(Span::new(32, 32)),
13871404
}
13881405
);
13891406

@@ -1393,7 +1410,8 @@ mod test {
13931410
Production {
13941411
symbols: vec![token_span("b", b_span)],
13951412
precedence: None,
1396-
action: None
1413+
action: None,
1414+
empty_span: None,
13971415
}
13981416
);
13991417
assert_eq!(&src[b_span.start()..b_span.end()], "b");
@@ -1402,7 +1420,8 @@ mod test {
14021420
Production {
14031421
symbols: vec![],
14041422
precedence: None,
1405-
action: None
1423+
action: None,
1424+
empty_span: Some(Span::new(56, 56)),
14061425
}
14071426
);
14081427

@@ -1411,7 +1430,8 @@ mod test {
14111430
Production {
14121431
symbols: vec![],
14131432
precedence: None,
1414-
action: None
1433+
action: None,
1434+
empty_span: Some(Span::new(74, 74)),
14151435
}
14161436
);
14171437
let c_span = Span::new(77, 78);
@@ -1420,7 +1440,8 @@ mod test {
14201440
Production {
14211441
symbols: vec![token_span("c", c_span)],
14221442
precedence: None,
1423-
action: None
1443+
action: None,
1444+
empty_span: None,
14241445
}
14251446
);
14261447
assert_eq!(&src[c_span.start()..c_span.end()], "c");
@@ -1451,7 +1472,8 @@ mod test {
14511472
Production {
14521473
symbols: vec![token_span("a", a_span), rule_span("B", b_span)],
14531474
precedence: None,
1454-
action: None
1475+
action: None,
1476+
empty_span: None,
14551477
}
14561478
);
14571479
assert_eq!(&src[a_span.start()..a_span.end()], "a");
@@ -1473,7 +1495,8 @@ mod test {
14731495
Production {
14741496
symbols: vec![token_span("a", a_span), token_span("b", b_span)],
14751497
precedence: None,
1476-
action: None
1498+
action: None,
1499+
empty_span: None,
14771500
}
14781501
);
14791502
assert_eq!(&src[a_span.start()..a_span.end()], "a");
@@ -1552,7 +1575,8 @@ mod test {
15521575
Production {
15531576
symbols: vec![token_span("T", t_span)],
15541577
precedence: None,
1555-
action: None
1578+
action: None,
1579+
empty_span: None,
15561580
}
15571581
);
15581582
assert_eq!(&src[t_span.start()..t_span.end() + 1], "T;");
@@ -2805,4 +2829,56 @@ B";
28052829
.unwrap();
28062830
}
28072831
}
2832+
2833+
#[test]
2834+
fn test_empty_production_spans_issue_473() {
2835+
let empty_prod_conflicts = [
2836+
(
2837+
"%start Expr
2838+
%%
2839+
Expr: %empty | Factor;
2840+
Factor: ')' Expr ')';
2841+
",
2842+
(0, Some(Span::new(21, 28))),
2843+
),
2844+
(
2845+
"%start Expr
2846+
%%
2847+
Expr: | Factor;
2848+
Factor: ')' Expr ')';
2849+
",
2850+
(0, Some(Span::new(21, 21))),
2851+
),
2852+
(
2853+
"%start Expr
2854+
%%
2855+
Expr: Factor | %empty;
2856+
Factor: ')' Expr ')';
2857+
",
2858+
(1, Some(Span::new(30, 36))),
2859+
),
2860+
(
2861+
"%start Expr
2862+
%%
2863+
Expr: Factor | ;
2864+
Factor: ')' Expr ')';
2865+
",
2866+
(1, Some(Span::new(30, 30))),
2867+
),
2868+
];
2869+
2870+
for (i, (src, (empty_pidx, empty_span))) in empty_prod_conflicts.iter().enumerate() {
2871+
eprintln!("{}", i);
2872+
let ast = parse(YaccKind::Original(YaccOriginalActionKind::NoAction), src).unwrap();
2873+
assert_eq!(
2874+
ast.prods[ast.get_rule("Expr").unwrap().pidxs[*empty_pidx]],
2875+
Production {
2876+
symbols: vec![],
2877+
precedence: None,
2878+
action: None,
2879+
empty_span: *empty_span,
2880+
}
2881+
);
2882+
}
2883+
}
28082884
}

0 commit comments

Comments
 (0)