Skip to content

Commit f91d922

Browse files
authored
Initial generation of IR from nu AST. (#40)
The initial implementation only handles simple math expressions, the focus is primarily on introducing new structs, agreeing on naming and adding the output to insta tests.
1 parent 823083e commit f91d922

36 files changed

+361
-1
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ edition = "2021"
88
[dependencies]
99
tracy-client = { version = "0.17.3", default-features = false } # for tracy v0.11.1
1010
logos = "0.15"
11+
nu-protocol = "0.101"
1112

1213
[profile.profiling]
1314
inherits = "release"
@@ -30,7 +31,6 @@ path = "src/lib.rs"
3031
insta = { version = "1.33.0", features = ["glob"] }
3132
tango-bench = "0.6"
3233
nu-parser = "0.101"
33-
nu-protocol = "0.101"
3434
nu-cmd-lang = "0.101"
3535

3636
[[bench]]

src/compiler.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,17 @@ impl Compiler {
233233
.get(span_start..span_end)
234234
.expect("internal error: missing source of span")
235235
}
236+
237+
/// Get the source contents of a node
238+
pub fn node_as_str(&self, node_id: NodeId) -> &str {
239+
std::str::from_utf8(self.get_span_contents(node_id))
240+
.expect("internal error: expected utf8 string")
241+
}
242+
243+
/// Get the source contents of a node as i64
244+
pub fn node_as_i64(&self, node_id: NodeId) -> i64 {
245+
self.node_as_str(node_id)
246+
.parse::<i64>()
247+
.expect("internal error: expected i64")
248+
}
236249
}

src/ir_generator.rs

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
use crate::compiler::Compiler;
2+
use crate::errors::{Severity, SourceError};
3+
use crate::parser::{AstNode, NodeId};
4+
use nu_protocol::ast::{Math, Operator};
5+
use nu_protocol::ir::{Instruction, IrBlock, Literal};
6+
use nu_protocol::{RegId, Span};
7+
8+
/// Generates IR (Intermediate Representation) from nu AST.
9+
pub struct IrGenerator<'a> {
10+
// Immutable reference to a compiler after the typechecker pass
11+
compiler: &'a Compiler,
12+
errors: Vec<SourceError>,
13+
block: IrBlock,
14+
}
15+
16+
impl<'a> IrGenerator<'a> {
17+
pub fn new(compiler: &'a Compiler) -> Self {
18+
Self {
19+
compiler,
20+
errors: Default::default(),
21+
block: IrBlock {
22+
instructions: Default::default(),
23+
spans: Default::default(),
24+
data: Default::default(),
25+
ast: Default::default(),
26+
comments: Default::default(),
27+
register_count: 0,
28+
file_count: 0,
29+
},
30+
}
31+
}
32+
33+
/// Generates the IR from the given state of the compiler.
34+
/// After this is called, use `block` and `errors` to get the result.
35+
pub fn generate(&mut self) {
36+
if self.compiler.ast_nodes.is_empty() {
37+
return;
38+
}
39+
let node_id = NodeId(self.compiler.ast_nodes.len() - 1);
40+
let Some(reg) = self.generate_node(node_id) else {
41+
return;
42+
};
43+
self.add_instruction(node_id, Instruction::Return { src: reg });
44+
}
45+
46+
/// Returns generated IR block.
47+
///
48+
/// Call `generate` before using this method and ensure there are no errors.
49+
pub fn block(self) -> IrBlock {
50+
self.block
51+
}
52+
53+
/// Returns errors encountered during IR generation step.
54+
///
55+
/// Call `generate` before using this method.
56+
pub fn errors(&self) -> &Vec<SourceError> {
57+
&self.errors
58+
}
59+
60+
/// Prints the internal state to standard output.
61+
pub fn print(&self) {
62+
let output = self.display_state();
63+
print!("{output}");
64+
}
65+
66+
/// Displays the state of the IR generator.
67+
/// The output can be used for human debugging and for snapshot tests.
68+
pub fn display_state(&self) -> String {
69+
let mut result = String::new();
70+
result.push_str("==== IR ====\n");
71+
result.push_str(&format!("register_count: {}\n", self.block.register_count));
72+
result.push_str(&format!("file_count: {}\n", self.block.file_count));
73+
74+
for (idx, instruction) in self.block.instructions.iter().enumerate() {
75+
result.push_str(&format!("{}: {:?}\n", idx, instruction));
76+
}
77+
78+
if !self.errors.is_empty() {
79+
result.push_str("==== IR ERRORS ====\n");
80+
for error in &self.errors {
81+
result.push_str(&format!(
82+
"{:?} (NodeId {}): {}\n",
83+
error.severity, error.node_id.0, error.message
84+
));
85+
}
86+
}
87+
result
88+
}
89+
90+
// Returns unused register.
91+
fn next_register(&mut self) -> RegId {
92+
let r = RegId::new(self.block.register_count);
93+
self.block.register_count += 1;
94+
r
95+
}
96+
97+
fn generate_node(&mut self, node_id: NodeId) -> Option<RegId> {
98+
let ast_node = &self.compiler.ast_nodes[node_id.0];
99+
match ast_node {
100+
AstNode::Int => {
101+
let next_reg = self.next_register();
102+
let val = self.compiler.node_as_i64(node_id);
103+
self.add_instruction(
104+
node_id,
105+
Instruction::LoadLiteral {
106+
dst: next_reg,
107+
lit: Literal::Int(val),
108+
},
109+
);
110+
Some(next_reg)
111+
}
112+
AstNode::Block(block_id) => {
113+
let block = &self.compiler.blocks[block_id.0];
114+
let mut last = None;
115+
for id in &block.nodes {
116+
last = self.generate_node(*id);
117+
last?;
118+
}
119+
last
120+
}
121+
AstNode::BinaryOp { lhs, op, rhs } => {
122+
let l = self.generate_node(*lhs)?;
123+
let r = self.generate_node(*rhs)?;
124+
let o = self.node_to_operator(*op)?;
125+
self.add_instruction(
126+
node_id,
127+
Instruction::BinaryOp {
128+
lhs_dst: l,
129+
op: o,
130+
rhs: r,
131+
},
132+
);
133+
Some(l)
134+
}
135+
_ => {
136+
self.error(format!("node {:?} not suported yet", ast_node), node_id);
137+
None
138+
}
139+
}
140+
}
141+
142+
fn add_instruction(&mut self, node_id: NodeId, instruction: Instruction) {
143+
let span = self.compiler.get_span(node_id);
144+
self.block.spans.push(Span {
145+
start: span.start,
146+
end: span.end,
147+
});
148+
self.block.ast.push(None);
149+
self.block.instructions.push(instruction);
150+
}
151+
152+
fn node_to_operator(&mut self, node_id: NodeId) -> Option<Operator> {
153+
match self.compiler.get_node(node_id) {
154+
AstNode::Plus => Some(Operator::Math(Math::Plus)),
155+
AstNode::Multiply => Some(Operator::Math(Math::Multiply)),
156+
node => {
157+
self.error(format!("unrecognized operator {:?}", node), node_id);
158+
None
159+
}
160+
}
161+
}
162+
163+
fn error(&mut self, message: impl Into<String>, node_id: NodeId) {
164+
self.errors.push(SourceError {
165+
message: message.into(),
166+
node_id,
167+
severity: Severity::Error,
168+
});
169+
}
170+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pub mod compiler;
22
pub mod errors;
3+
pub mod ir_generator;
34
pub mod lexer;
45
pub mod parser;
56
pub mod protocol;

src/main.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::process::exit;
22

33
use new_nu_parser::compiler::Compiler;
4+
use new_nu_parser::ir_generator::IrGenerator;
45
use new_nu_parser::lexer::lex;
56
use new_nu_parser::parser::Parser;
67
use new_nu_parser::resolver::Resolver;
@@ -80,6 +81,12 @@ fn main() {
8081
typechecker.print();
8182
}
8283

84+
let mut ir_generator = IrGenerator::new(&compiler);
85+
ir_generator.generate();
86+
if do_print {
87+
ir_generator.print();
88+
}
89+
8390
compiler.merge_types(typechecker.to_types());
8491
}
8592
}

src/snapshots/[email protected]

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,8 @@ snapshot_kind: text
2323
4: string
2424
5: stream<binary>
2525
6: stream<binary>
26+
==== IR ====
27+
register_count: 0
28+
file_count: 0
29+
==== IR ERRORS ====
30+
Error (NodeId 2): node Alias { new_name: NodeId(0), old_name: NodeId(1) } not suported yet

src/snapshots/new_nu_parser__test__node_output@binary_ops_exact.nu.snap

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
source: src/test.rs
33
expression: evaluate_example(path)
44
input_file: tests/binary_ops_exact.nu
5+
snapshot_kind: text
56
---
67
==== COMPILER ====
78
0: Int (0 to 1) "1"
@@ -71,3 +72,10 @@ input_file: tests/binary_ops_exact.nu
7172
29: list<int>
7273
30: bool
7374
31: bool
75+
==== IR ====
76+
register_count: 2
77+
file_count: 0
78+
0: LoadLiteral { dst: RegId(0), lit: Int(1) }
79+
1: LoadLiteral { dst: RegId(1), lit: Int(1) }
80+
==== IR ERRORS ====
81+
Error (NodeId 1): unrecognized operator Equal

src/snapshots/new_nu_parser__test__node_output@binary_ops_mismatch.nu.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,8 @@ Error (NodeId 1): type mismatch: unsupported addition between string and float
4646
Error (NodeId 5): type mismatch: unsupported append between string and float
4747
Error (NodeId 9): type mismatch: unsupported logical operation between bool and string
4848
Error (NodeId 13): type mismatch: unsupported string operation between bool and string
49+
==== IR ====
50+
register_count: 0
51+
file_count: 0
52+
==== IR ERRORS ====
53+
Error (NodeId 0): node String not suported yet

src/snapshots/new_nu_parser__test__node_output@binary_ops_subtypes.nu.snap

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
source: src/test.rs
33
expression: evaluate_example(path)
44
input_file: tests/binary_ops_subtypes.nu
5+
snapshot_kind: text
56
---
67
==== COMPILER ====
78
0: Int (0 to 1) "1"
@@ -115,3 +116,9 @@ input_file: tests/binary_ops_subtypes.nu
115116
51: int
116117
52: float
117118
53: float
119+
==== IR ====
120+
register_count: 1
121+
file_count: 0
122+
0: LoadLiteral { dst: RegId(0), lit: Int(1) }
123+
==== IR ERRORS ====
124+
Error (NodeId 2): node Float not suported yet

src/snapshots/[email protected]

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,8 @@ snapshot_kind: text
9090
36: unknown
9191
37: stream<binary>
9292
38: stream<binary>
93+
==== IR ====
94+
register_count: 0
95+
file_count: 0
96+
==== IR ERRORS ====
97+
Error (NodeId 7): node Call { parts: [NodeId(0), NodeId(1), NodeId(2), NodeId(6)] } not suported yet

0 commit comments

Comments
 (0)