Skip to content

Commit 1a4bcc5

Browse files
authored
rename interpreter tests and interpreter feature flag (#2662)
We've used "interpreter" and "VM" in the past to describe the two different interpreters. This is confusing because... the VM is an interpreter too. so it's hard to talk about generic interpretation. This PR uses a different nomenclature that doesn't have the "interpreter" baggage. - executor: any interpreter (thir-interpreter or vm bytecode interpreter) - thir-interpreter: the direct-style interpreter. What we used to call "interpreter" - vm: the vm bytecode interpreter Concrete changes: - the new "interpreter" test suite has been renamed to "evalutator" - the old "interpreter" feature flag is called "thir-interpreter"
1 parent 1ddfb1c commit 1a4bcc5

File tree

20 files changed

+298
-19
lines changed

20 files changed

+298
-19
lines changed
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
//! Executor evaluation tests.
2+
//!
3+
//! These tests evaluate BAML expressions and compare the results to expected outputs.
4+
//! Test files are in the `executor_tests/` directory and use the format:
5+
//!
6+
//! ```baml
7+
//! function Foo() -> int {
8+
//! 1 + 2
9+
//! }
10+
//!
11+
//! //> Foo()
12+
//! //
13+
//! // 3
14+
//! ```
15+
//!
16+
//! The first comment starting with `>` contains the expression to evaluate,
17+
//! and subsequent comments contain the expected pretty-printed result.
18+
19+
use std::path::PathBuf;
20+
21+
use anyhow::{Context, Result};
22+
use baml_types::BamlValue;
23+
24+
// Use the CoreRuntime pattern: select implementation based on feature flag
25+
#[cfg(feature = "thir-interpreter")]
26+
type TestRuntime = ThirInterpreterRuntime;
27+
#[cfg(not(feature = "thir-interpreter"))]
28+
type TestRuntime = VmRuntime;
29+
30+
/// Test case parsed from a .baml file
31+
#[derive(Debug)]
32+
struct InterpreterTest {
33+
/// The BAML source code
34+
source: String,
35+
/// Expression to evaluate (from comment starting with `>`)
36+
expr: String,
37+
/// Expected output (from subsequent comments)
38+
expected: String,
39+
/// File path for error reporting
40+
file_path: PathBuf,
41+
}
42+
43+
/// Parse a test file into test cases
44+
fn parse_test_file(path: &PathBuf) -> Result<Vec<InterpreterTest>> {
45+
let source = std::fs::read_to_string(path)
46+
.with_context(|| format!("Failed to read test file: {}", path.display()))?;
47+
48+
let mut tests = Vec::new();
49+
let mut current_expr: Option<String> = None;
50+
let mut current_expected = Vec::new();
51+
let mut baml_source = String::new();
52+
let mut in_comment_block = false;
53+
54+
for line in source.lines() {
55+
let trimmed = line.trim();
56+
57+
if trimmed.starts_with("//>") {
58+
// Start of a new test expression
59+
if let Some(expr) = current_expr.take() {
60+
// Save previous test
61+
tests.push(InterpreterTest {
62+
source: baml_source.clone(),
63+
expr,
64+
expected: current_expected.join("\n"),
65+
file_path: path.clone(),
66+
});
67+
current_expected.clear();
68+
}
69+
70+
current_expr = Some(trimmed.trim_start_matches("//>").trim().to_string());
71+
in_comment_block = true;
72+
} else if in_comment_block && trimmed.starts_with("//") {
73+
// Continuation of expected output
74+
let content = trimmed.trim_start_matches("//").trim_start();
75+
current_expected.push(content.to_string());
76+
} else if in_comment_block && trimmed.is_empty() {
77+
// Empty line might be part of expected output
78+
current_expected.push(String::new());
79+
} else if !trimmed.starts_with("//") {
80+
// Not a comment, back to source code
81+
in_comment_block = false;
82+
baml_source.push_str(line);
83+
baml_source.push('\n');
84+
}
85+
}
86+
87+
// Don't forget the last test
88+
if let Some(expr) = current_expr {
89+
tests.push(InterpreterTest {
90+
source: baml_source.clone(),
91+
expr,
92+
expected: current_expected.join("\n"),
93+
file_path: path.clone(),
94+
});
95+
}
96+
97+
Ok(tests)
98+
}
99+
100+
/// Test result including value and watch notifications
101+
#[derive(Debug)]
102+
struct TestResult {
103+
value: BamlValue,
104+
watch_notifications: Vec<String>,
105+
}
106+
107+
/// Run interpreter tests using THIR interpreter
108+
#[cfg(feature = "thir-interpreter")]
109+
struct ThirInterpreterRuntime;
110+
111+
#[cfg(feature = "thir-interpreter")]
112+
impl ThirInterpreterRuntime {
113+
fn eval_expr(source: &str, expr: &str) -> Result<TestResult> {
114+
use std::sync::{Arc, Mutex};
115+
116+
use baml_compiler::{
117+
thir::{interpret::interpret_thir, typecheck::typecheck},
118+
watch::{SharedWatchHandler, WatchNotification},
119+
};
120+
use internal_baml_ast::parse_standalone_expression;
121+
use internal_baml_diagnostics::{Diagnostics, SourceFile};
122+
123+
// Parse and compile the BAML source
124+
// Convert to owned string to satisfy 'static lifetime requirement
125+
let source_owned = source.to_string();
126+
let source_static: &'static str = Box::leak(source_owned.into_boxed_str());
127+
let ast = baml_compiler::test::ast(source_static)?;
128+
let hir = baml_compiler::test::hir(&ast)?;
129+
let (thir, _diagnostics) = typecheck(&hir)?;
130+
131+
// Parse the expression to evaluate
132+
let mut diagnostics = Diagnostics::new(SourceFile::new_static(expr));
133+
let hir_expr = parse_standalone_expression(expr, &mut diagnostics)?;
134+
let thir_expr = baml_compiler::thir::typecheck::typecheck_expression(
135+
&hir_expr,
136+
&baml_compiler::thir::typecheck::TypeContext::new(&hir),
137+
&mut diagnostics,
138+
);
139+
140+
// Collect watch notifications
141+
let notifications = Arc::new(Mutex::new(Vec::new()));
142+
let notifications_clone = notifications.clone();
143+
144+
let watch_handler = SharedWatchHandler::new(move |notification: WatchNotification| {
145+
let mut notifs = notifications_clone.lock().unwrap();
146+
// Format the notification
147+
let formatted = format!(
148+
"[watch] {} = {}",
149+
notification.variable_name,
150+
format!("{:#?}", notification.value).trim()
151+
);
152+
notifs.push(formatted);
153+
});
154+
155+
// Interpret the expression (no LLM calls in tests)
156+
let result = tokio::runtime::Runtime::new()?.block_on(async {
157+
interpret_thir(
158+
&thir,
159+
&thir_expr,
160+
|_name, _args, _ctx| {
161+
Box::pin(async {
162+
anyhow::bail!("LLM calls not supported in interpreter tests")
163+
})
164+
},
165+
Some(watch_handler),
166+
None, // No function name
167+
)
168+
.await
169+
})?;
170+
171+
// Extract collected notifications
172+
let watch_notifications = notifications.lock().unwrap().clone();
173+
174+
// Convert to BamlValue
175+
let value = baml_types::baml_value_with_meta_to_baml_value(result);
176+
177+
Ok(TestResult {
178+
value,
179+
watch_notifications,
180+
})
181+
}
182+
}
183+
184+
/// Run interpreter tests using VM
185+
#[cfg(not(feature = "thir-interpreter"))]
186+
struct VmRuntime;
187+
188+
#[cfg(not(feature = "thir-interpreter"))]
189+
impl VmRuntime {
190+
fn eval_expr(source: &str, expr: &str) -> Result<TestResult> {
191+
use baml_vm::{BamlVmProgram, EvalStack};
192+
193+
// Compile to VM
194+
// Convert to owned string to satisfy 'static lifetime requirement
195+
let source_owned = source.to_string();
196+
let source_static: &'static str = Box::leak(source_owned.into_boxed_str());
197+
let ast = baml_compiler::test::ast(source_static)?;
198+
let program = baml_compiler::compile(&ast)?;
199+
200+
// For now, VM tests would need additional implementation
201+
// to evaluate arbitrary expressions
202+
anyhow::bail!("VM expression evaluation not yet implemented for tests")
203+
}
204+
}
205+
206+
/// Run a single interpreter test
207+
fn run_test(test: &InterpreterTest) -> Result<()> {
208+
let result = TestRuntime::eval_expr(&test.source, &test.expr)?;
209+
210+
// Build actual output: value + watch notifications
211+
let mut actual_lines = Vec::new();
212+
213+
// Add watch notifications first
214+
for notification in &result.watch_notifications {
215+
actual_lines.push(notification.clone());
216+
}
217+
218+
// Add separator if there are watch notifications
219+
if !result.watch_notifications.is_empty() {
220+
actual_lines.push(String::new());
221+
}
222+
223+
// Add the result value
224+
actual_lines.push(format!("{:#?}", result.value).trim().to_string());
225+
226+
let actual = actual_lines.join("\n");
227+
let expected = test.expected.trim();
228+
229+
if actual != expected {
230+
anyhow::bail!(
231+
"Test failed: {}\nExpression: {}\nExpected:\n{}\n\nActual:\n{}",
232+
test.file_path.display(),
233+
test.expr,
234+
expected,
235+
actual
236+
);
237+
}
238+
239+
Ok(())
240+
}
241+
242+
/// Main test function that discovers and runs all interpreter tests
243+
#[test]
244+
fn executor_tests() -> Result<()> {
245+
let test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/interpreter_tests");
246+
247+
if !test_dir.exists() {
248+
// No tests yet, skip
249+
return Ok(());
250+
}
251+
252+
let mut all_tests = Vec::new();
253+
254+
// Discover all .baml test files
255+
for entry in std::fs::read_dir(&test_dir)? {
256+
let entry = entry?;
257+
let path = entry.path();
258+
259+
if path.extension().and_then(|s| s.to_str()) == Some("baml") {
260+
let tests = parse_test_file(&path)?;
261+
all_tests.extend(tests);
262+
}
263+
}
264+
265+
println!("Running {} interpreter tests", all_tests.len());
266+
267+
// Run all tests
268+
for test in &all_tests {
269+
run_test(test).with_context(|| {
270+
format!(
271+
"Test failed in {} for expression: {}",
272+
test.file_path.display(),
273+
test.expr
274+
)
275+
})?;
276+
}
277+
278+
Ok(())
279+
}

engine/baml-runtime/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ defaults = ["skip-integ-tests"]
201201
internal = []
202202
skip-integ-tests = []
203203
# Use the THIR interpreter runtime instead of the VM bytecode runtime
204-
interpreter = []
204+
thir-interpreter = []
205205

206206

207207
[dev-dependencies]

engine/baml-runtime/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ use std::{
3434
};
3535

3636
use anyhow::{Context, Result};
37-
#[cfg(feature = "interpreter")]
37+
#[cfg(feature = "thir-interpreter")]
3838
use async_interpreter_runtime::BamlAsyncInterpreterRuntime as CoreRuntime;
39-
#[cfg(not(feature = "interpreter"))]
39+
#[cfg(not(feature = "thir-interpreter"))]
4040
use async_vm_runtime::BamlAsyncVmRuntime as CoreRuntime;
4141
use baml_compiler::watch::SharedWatchHandler;
4242
use baml_ids::{FunctionCallId, HttpRequestId};

engine/baml-runtime/tests/interpreter.rs renamed to engine/baml-runtime/tests/executor.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Interpreter evaluation tests.
22
//!
33
//! These tests evaluate BAML expressions and compare the results to expected outputs.
4-
//! Test files are in the `interpreter_tests/` directory and use the format:
4+
//! Test files are in the `executor_tests/` directory and use the format:
55
//!
66
//! ```baml
77
//! function Foo() -> int {
@@ -20,9 +20,9 @@ use std::path::PathBuf;
2020

2121
use anyhow::{Context, Result};
2222
// Use the CoreRuntime pattern: select implementation based on feature flag
23-
#[cfg(feature = "interpreter")]
23+
#[cfg(feature = "thir-interpreter")]
2424
use baml_runtime::async_interpreter_runtime::BamlAsyncInterpreterRuntime as CoreRuntime;
25-
#[cfg(not(feature = "interpreter"))]
25+
#[cfg(not(feature = "thir-interpreter"))]
2626
use baml_runtime::async_vm_runtime::BamlAsyncVmRuntime as CoreRuntime;
2727
use baml_types::BamlValue;
2828

@@ -339,7 +339,7 @@ fn update_test_expectation(test: &InterpreterTest, new_output: &str) -> Result<(
339339

340340
/// Main test function that discovers and runs all interpreter tests
341341
#[tokio::test]
342-
async fn interpreter_tests() -> Result<()> {
342+
async fn executor_tests() -> Result<()> {
343343
let test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/interpreter_tests");
344344

345345
if !test_dir.exists() {

0 commit comments

Comments
 (0)