Skip to content

Commit 6899325

Browse files
authored
Lexer and parser refactor (#31)
The on-demand lexer is replaced with a standalone lexing pass using the Logos crate. The lexing code is refactored away from the parser into a separate data structure. The Parser is refactored and simplified accordingly. The new lexer correctly recognizes more syntactic shapes than the previous one. The new lexer+parser seems slightly faster (5-6 ms together on the combined1000.nu benchmark) than the previous version, but the main benefit is the lexer disentangled from the parser code. It is now possible to change the lexer more easily, including turning it into an on-demand lexer again if need be. The main shortcomings are: * Difficulty lexing string interpolation, e.g., `$"foo(1 + 2)bar"`. We'd need to switch to another lexer. It might be easier to use on-demand lexing for it. * Unmatched delimiters. If the file ends with an unmatched delimiter, an extremely unhelpful error is emitted. This should be solvable by using [callbacks](https://logos.maciej.codes/callbacks.html). Notable API changes: * `self.peek()` -> `self.tokens.peek()` * `self.next()` -> gone, use `self.tokens.advance()` to point at the next tokens followed by `self.tokens.peek()`.
1 parent eba4f27 commit 6899325

27 files changed

+1476
-1577
lines changed

Cargo.lock

Lines changed: 56 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77

88
[dependencies]
99
tracy-client = { version = "0.17.3", default-features = false } # for tracy v0.11.1
10+
logos = "0.15"
1011

1112
[profile.profiling]
1213
inherits = "release"

benches/benchmarks.rs

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::process::exit;
22

3+
use new_nu_parser::lexer::{lex, Tokens};
34
use nu_protocol::engine::{EngineState, StateWorkingSet};
45
use tango_bench::{benchmark_fn, tango_benchmarks, tango_main, Benchmark, IntoBenchmarks};
56

@@ -16,9 +17,11 @@ const BENCHMARKS: &[&str] = &[
1617
"combined10",
1718
"combined100",
1819
"combined1000",
20+
"int100",
1921
];
2022

2123
enum Stage {
24+
Lex,
2225
Parse,
2326
Resolve,
2427
Typecheck,
@@ -30,6 +33,7 @@ enum Stage {
3033

3134
/// Stages of compilation we want to profile
3235
const STAGES: &[Stage] = &[
36+
Stage::Lex,
3337
Stage::Parse,
3438
Stage::Resolve,
3539
Stage::Typecheck,
@@ -52,8 +56,15 @@ fn setup_compiler(
5256
let contents = std::fs::read(fname).map_err(|_| format!("Cannot find file {fname}"))?;
5357
compiler.add_file(&fname, &contents);
5458

59+
let (tokens, err) = lex(&contents, span_offset);
60+
if let Err(e) = err {
61+
tokens.eprint(&compiler.source);
62+
eprintln!("Lexing error. Error: {:?}", e);
63+
exit(1);
64+
}
65+
5566
if do_parse {
56-
let parser = Parser::new(compiler, span_offset);
67+
let parser = Parser::new(compiler, tokens);
5768
compiler = parser.parse();
5869

5970
if !compiler.errors.is_empty() {
@@ -87,8 +98,8 @@ fn setup_compiler(
8798
}
8899

89100
/// Parse only
90-
pub fn parse(mut compiler: Compiler, span_offset: usize) {
91-
let parser = Parser::new(compiler, span_offset);
101+
pub fn parse(mut compiler: Compiler, tokens: Tokens) {
102+
let parser = Parser::new(compiler, tokens);
92103
compiler = parser.parse();
93104

94105
if !compiler.errors.is_empty() {
@@ -129,7 +140,14 @@ pub fn typecheck(mut compiler: Compiler, do_merge: bool) {
129140

130141
/// Run all compiler stages
131142
pub fn compile(mut compiler: Compiler, span_offset: usize) {
132-
let parser = Parser::new(compiler, span_offset);
143+
let (tokens, err) = lex(&compiler.source, span_offset);
144+
if let Err(e) = err {
145+
tokens.eprint(&compiler.source);
146+
eprintln!("Lexing error. Error: {:?}", e);
147+
exit(1);
148+
}
149+
150+
let parser = Parser::new(compiler, tokens);
133151
compiler = parser.parse();
134152

135153
if !compiler.errors.is_empty() {
@@ -176,13 +194,36 @@ fn compiler_benchmarks() -> impl IntoBenchmarks {
176194
let bench_file = format!("benches/nu/{bench_name}.nu");
177195

178196
let bench = match stage {
197+
Stage::Lex => {
198+
let name = format!("{bench_name}_lex");
199+
benchmark_fn(name, move |b| {
200+
let contents = std::fs::read(&bench_file)
201+
.expect(&format!("Cannot find file {bench_file}"));
202+
b.iter(move || {
203+
let (tokens, err) = lex(&contents, 0);
204+
if let Err(e) = err {
205+
tokens.eprint(&contents);
206+
eprintln!("Lexing error. Error: {:?}", e);
207+
exit(1);
208+
}
209+
})
210+
})
211+
}
179212
Stage::Parse => {
180213
let name = format!("{bench_name}_parse");
181214
benchmark_fn(name, move |b| {
182215
let (compiler_def_init, span_offset) =
183216
setup_compiler(&bench_file, false, false, false)
184217
.expect("Error setting up compiler");
185-
b.iter(move || parse(compiler_def_init.clone(), span_offset))
218+
let contents = std::fs::read(&bench_file)
219+
.expect(&format!("Cannot find file {bench_file}"));
220+
let (tokens, err) = lex(&contents, span_offset);
221+
if let Err(e) = err {
222+
tokens.eprint(&contents);
223+
eprintln!("Lexing error. Error: {:?}", e);
224+
exit(1);
225+
}
226+
b.iter(move || parse(compiler_def_init.clone(), tokens.clone()))
186227
})
187228
}
188229
Stage::Resolve => {

0 commit comments

Comments
 (0)