Skip to content

Commit cc423a0

Browse files
committed
feat(argus-parser): complete lexer
1 parent 49408a3 commit cc423a0

File tree

1 file changed

+162
-3
lines changed

1 file changed

+162
-3
lines changed

argus-parser/src/lexer.rs

Lines changed: 162 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
use std::collections::HashMap;
21
use std::{env, fmt, fs};
32

43
use ariadne::{sources, Color, Label, Report, ReportKind};
54
use chumsky::prelude::*;
65

76
pub type Span = SimpleSpan<usize>;
87

9-
#[derive(Clone, Debug, PartialEq)]
8+
#[derive(Clone, Debug, PartialEq, Eq)]
109
pub enum Token<'src> {
1110
Semicolon,
1211
LBracket,
@@ -32,10 +31,170 @@ pub enum Token<'src> {
3231
And,
3332
Or,
3433
Implies,
35-
Xor,
3634
Equiv,
35+
Xor,
3736
Next,
3837
Always,
3938
Eventually,
4039
Until,
4140
}
41+
42+
impl<'src> fmt::Display for Token<'src> {
43+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44+
match self {
45+
Token::Semicolon => write!(f, ";"),
46+
Token::LBracket => write!(f, "["),
47+
Token::RBracket => write!(f, "]"),
48+
Token::LParen => write!(f, "("),
49+
Token::RParen => write!(f, ")"),
50+
Token::Comma => write!(f, ","),
51+
Token::Bool(val) => write!(f, "{}", val),
52+
Token::Num(val) => write!(f, "{}", val),
53+
Token::Ident(ident) => write!(f, "{}", ident),
54+
Token::Minus => write!(f, "-"),
55+
Token::Plus => write!(f, "+"),
56+
Token::Times => write!(f, "*"),
57+
Token::Divide => write!(f, "/"),
58+
Token::Lt => write!(f, "<"),
59+
Token::Le => write!(f, "<="),
60+
Token::Gt => write!(f, ">"),
61+
Token::Ge => write!(f, ">="),
62+
Token::Eq => write!(f, "=="),
63+
Token::Neq => write!(f, "!="),
64+
Token::Assign => write!(f, "="),
65+
Token::Not => write!(f, "!"),
66+
Token::And => write!(f, "&"),
67+
Token::Or => write!(f, "|"),
68+
Token::Implies => write!(f, "->"),
69+
Token::Equiv => write!(f, "<->"),
70+
Token::Xor => write!(f, "^"),
71+
Token::Next => write!(f, "X"),
72+
Token::Always => write!(f, "G"),
73+
Token::Eventually => write!(f, "F"),
74+
Token::Until => write!(f, "U"),
75+
}
76+
}
77+
}
78+
79+
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
80+
// A parser for numbers
81+
let digits = text::digits(10).slice();
82+
83+
let frac = just('.').then(digits);
84+
85+
let exp = just('e').or(just('E')).then(one_of("+-").or_not()).then(digits);
86+
87+
let number = just('-')
88+
.or_not()
89+
.then(text::int(10))
90+
.then(frac.or_not())
91+
.then(exp.or_not())
92+
.map_slice(Token::Num)
93+
.boxed();
94+
95+
// A parser for control characters (delimiters, semicolons, etc.)
96+
let ctrl = choice((
97+
just(";").to(Token::Semicolon),
98+
just("[").to(Token::LBracket),
99+
just("]").to(Token::RBracket),
100+
just("(").to(Token::LParen),
101+
just(")").to(Token::RParen),
102+
just(",").to(Token::Comma),
103+
));
104+
105+
// Lexer for operator symbols
106+
let op = choice((
107+
just("<->").to(Token::Equiv),
108+
just("<=>").to(Token::Equiv),
109+
just("<=").to(Token::Le),
110+
just("<").to(Token::Lt),
111+
just(">=").to(Token::Ge),
112+
just(">").to(Token::Gt),
113+
just("!=").to(Token::Neq),
114+
just("==").to(Token::Eq),
115+
just("->").to(Token::Implies),
116+
just("=>").to(Token::Implies),
117+
just("!").to(Token::Not),
118+
just("~").to(Token::Not),
119+
just("\u{00ac}").to(Token::Not), // ¬
120+
just("&&").to(Token::And),
121+
just("&").to(Token::And),
122+
just("\u{2227}").to(Token::And), // ∧
123+
just("||").to(Token::And),
124+
just("|").to(Token::And),
125+
just("\u{2228}").to(Token::Or), // ∨
126+
just("^").to(Token::Xor),
127+
just("-").to(Token::Minus),
128+
just("+").to(Token::Plus),
129+
just("*").to(Token::Times),
130+
just("/").to(Token::Divide),
131+
just("=").to(Token::Assign),
132+
));
133+
134+
// A parser for strings
135+
// Strings in our grammar are identifiers too
136+
let quoted_ident = just('"')
137+
.ignore_then(none_of('"').repeated())
138+
.then_ignore(just('"'))
139+
.map_slice(Token::Ident);
140+
141+
// A parser for identifiers and keywords
142+
let ident = text::ident().map(|ident: &str| match ident {
143+
"true" => Token::Bool(true),
144+
"false" => Token::Bool(false),
145+
"G" => Token::Always,
146+
"alw" => Token::Always,
147+
"F" => Token::Eventually,
148+
"ev" => Token::Eventually,
149+
"X" => Token::Next,
150+
"U" => Token::Until,
151+
_ => Token::Ident(ident),
152+
});
153+
154+
// A single token can be one of the above
155+
let token = choice((op, ctrl, quoted_ident, ident, number));
156+
157+
let comment = just("//").then(any().and_is(just('\n').not()).repeated()).padded();
158+
159+
token
160+
.map_with_span(|tok, span| (tok, span))
161+
.padded_by(comment.repeated())
162+
.padded()
163+
// If we encounter an error, skip and attempt to lex the next character as a token instead
164+
.recover_with(skip_then_retry_until(any().ignored(), end()))
165+
.repeated()
166+
.collect()
167+
}
168+
169+
#[cfg(test)]
170+
mod tests {
171+
use super::*;
172+
173+
#[test]
174+
fn simple_test() {
175+
use Token::*;
176+
type Output<'a> = Vec<(Token<'a>, Span)>;
177+
type MyErr<'a> = extra::Err<Rich<'a, char, Span>>;
178+
let cases = [
179+
("true", vec![(Bool(true), Span::new(0, 4))]),
180+
("false", vec![(Bool(false), Span::new(0, 5))]),
181+
(
182+
"F a",
183+
vec![(Eventually, Span::new(0, 1)), (Ident("a"), Span::new(2, 3))],
184+
),
185+
(
186+
"a U b",
187+
vec![
188+
(Ident("a"), Span::new(0, 1)),
189+
(Until, Span::new(2, 3)),
190+
(Ident("b"), Span::new(4, 5)),
191+
],
192+
),
193+
];
194+
195+
for (input, expected) in cases {
196+
let actual = lexer().parse(input).into_result().unwrap();
197+
assert_eq!(actual, expected);
198+
}
199+
}
200+
}

0 commit comments

Comments
 (0)