Skip to content

Commit a4d10d4

Browse files
authored
feat(baml_syntax): Add Rowan-based syntax tree infrastructure for BAML (#2687)
Introduces a lossless, incremental syntax tree representation for BAML source code using the Rowan library (same as rust-analyzer). This provides the foundation for all language tooling including parsing, semantic analysis, and IDE features. ## What is a Syntax Tree? A syntax tree represents the grammatical structure of source code. Unlike traditional ASTs, Rowan syntax trees are: - Lossless: preserve all source text including whitespace and comments - Incremental: support efficient updates when code changes - Lazy: nodes are created on-demand during traversal - Parent-aware: nodes can traverse up to their parents ## Key Components **SyntaxKind**: Defines all possible elements in BAML code - Tokens: WORD, INTEGER, L_BRACE, ARROW, etc. - Nodes: FUNCTION_DEF, CLASS_DEF, TYPE_EXPR, etc. **Typed AST Nodes**: Provide ergonomic access to tree structure ```rust // Example from tests: let function = source_file.items() .find_map(|item| match item { Item::Function(f) => Some(f), _ => None, }) .unwrap(); assert_eq!(function.name().unwrap().text(), "GetUser"); ``` **Tree Builder**: Enables programmatic tree construction for testing ```rust let tree = SyntaxTreeBuilder::build_function( "GetUser", &[("id", "int"), ("name", "string")], "User" ); // Produces: function GetUser(id: int, name: string) -> User { ... } ``` **Traversal Utilities**: Navigate and query syntax trees - Find nodes by type at specific text positions - Search for ancestors/descendants of specific kinds - Filter out trivia (whitespace/comments) when needed ## Why This Matters This infrastructure enables: - Error-resilient parsing (partial trees even with syntax errors) - Incremental re-parsing (only changed portions) - Precise source locations for diagnostics - Code formatting that preserves comments - Refactoring tools that maintain code style - IDE features like go-to-definition and hover The lossless property means we can perfectly reconstruct the original source, essential for formatters and refactoring tools that need to preserve user intent. <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Adds a Rowan-backed, lossless syntax tree for BAML with typed AST nodes, builder and traversal utilities, expands SyntaxKind, and updates the parser/tests to use SOURCE_FILE as the root. > > - **baml_syntax**: > - **SyntaxKind**: Expand and reorganize token/node kinds; add helpers (`is_trivia`, `is_literal`, `is_operator`) and conversions. > - **Typed AST**: Add node wrappers (e.g., `SourceFile`, `FunctionDef`, `ClassDef`) and `Item` enum with accessors (names, params, fields). > - **Builder**: Introduce `SyntaxTreeBuilder` with helpers to build functions/classes for tests. > - **Traversal**: Add utilities for ancestor/descendant lookup, token iteration, error checks, and trimmed ranges. > - **Lib exports & tests**: Wire modules and add unit tests. > - **baml_parser**: > - Wrap tokens under `SOURCE_FILE` in stub `parse_file`. > - **Tests/Snapshots**: > - Update expected root node from `ROOT` to `SOURCE_FILE`. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit d28d9bd. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent 4429f4b commit a4d10d4

12 files changed

+803
-103
lines changed

baml_language/crates/baml_parser/src/parser.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ use crate::ParseError;
1313
///
1414
/// Returns the green tree and any parse errors encountered.
1515
///
16-
/// **STUB**: Currently just wraps all tokens in a ROOT node.
16+
/// **STUB**: Currently just wraps all tokens in a `SOURCE_FILE` node.
1717
pub fn parse_file(tokens: &[Token]) -> (GreenNode, Vec<ParseError>) {
1818
let mut builder = GreenNodeBuilder::new();
1919
let errors = Vec::new();
2020

21-
builder.start_node(SyntaxKind::ROOT.into());
21+
builder.start_node(SyntaxKind::SOURCE_FILE.into());
2222

2323
// Stub: Just add all tokens as-is
2424
for token in tokens {
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
//! Typed AST node wrappers for ergonomic tree access.
2+
3+
use crate::{SyntaxKind, SyntaxNode, SyntaxToken};
4+
use rowan::ast::AstNode;
5+
6+
/// Trait for all AST nodes.
7+
pub trait BamlAstNode: AstNode<Language = crate::BamlLanguage> {
8+
/// Get the syntax kind of this node.
9+
fn kind(&self) -> SyntaxKind {
10+
self.syntax().kind()
11+
}
12+
}
13+
14+
/// Macro to define AST node types.
15+
macro_rules! ast_node {
16+
($name:ident, $kind:ident) => {
17+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
18+
pub struct $name {
19+
syntax: SyntaxNode,
20+
}
21+
22+
impl BamlAstNode for $name {}
23+
24+
impl AstNode for $name {
25+
type Language = crate::BamlLanguage;
26+
27+
fn can_cast(kind: <Self::Language as rowan::Language>::Kind) -> bool {
28+
kind == SyntaxKind::$kind.into()
29+
}
30+
31+
fn cast(syntax: SyntaxNode) -> Option<Self> {
32+
if Self::can_cast(syntax.kind()) {
33+
Some(Self { syntax })
34+
} else {
35+
None
36+
}
37+
}
38+
39+
fn syntax(&self) -> &SyntaxNode {
40+
&self.syntax
41+
}
42+
}
43+
};
44+
}
45+
46+
// Define all AST node types
47+
ast_node!(SourceFile, SOURCE_FILE);
48+
ast_node!(FunctionDef, FUNCTION_DEF);
49+
ast_node!(ClassDef, CLASS_DEF);
50+
ast_node!(EnumDef, ENUM_DEF);
51+
ast_node!(ClientDef, CLIENT_DEF);
52+
ast_node!(TestDef, TEST_DEF);
53+
ast_node!(RetryPolicyDef, RETRY_POLICY_DEF);
54+
ast_node!(TemplateStringDef, TEMPLATE_STRING_DEF);
55+
ast_node!(TypeAliasDef, TYPE_ALIAS_DEF);
56+
57+
ast_node!(ParameterList, PARAMETER_LIST);
58+
ast_node!(Parameter, PARAMETER);
59+
ast_node!(FunctionBody, FUNCTION_BODY);
60+
ast_node!(Field, FIELD);
61+
ast_node!(EnumVariant, ENUM_VARIANT);
62+
ast_node!(ConfigBlock, CONFIG_BLOCK);
63+
ast_node!(ConfigItem, CONFIG_ITEM);
64+
65+
ast_node!(TypeExpr, TYPE_EXPR);
66+
ast_node!(Attribute, ATTRIBUTE);
67+
ast_node!(BlockAttribute, BLOCK_ATTRIBUTE);
68+
69+
ast_node!(Expr, EXPR);
70+
ast_node!(LetStmt, LET_STMT);
71+
ast_node!(IfExpr, IF_EXPR);
72+
ast_node!(WhileStmt, WHILE_STMT);
73+
ast_node!(ForExpr, FOR_EXPR);
74+
ast_node!(BlockExpr, BLOCK_EXPR);
75+
76+
// Implement accessor methods
77+
impl SourceFile {
78+
/// Iterate over all top-level items in the file.
79+
pub fn items(&self) -> impl Iterator<Item = Item> {
80+
self.syntax.children().filter_map(Item::cast)
81+
}
82+
}
83+
84+
impl FunctionDef {
85+
/// Get the function name.
86+
pub fn name(&self) -> Option<SyntaxToken> {
87+
self.syntax
88+
.children_with_tokens()
89+
.filter_map(rowan::NodeOrToken::into_token)
90+
.filter(|token| {
91+
token.kind() == SyntaxKind::WORD && token.parent() == Some(self.syntax.clone())
92+
})
93+
.nth(1) // Skip the "function" keyword, get the second WORD
94+
}
95+
96+
/// Get the parameter list.
97+
pub fn param_list(&self) -> Option<ParameterList> {
98+
self.syntax.children().find_map(ParameterList::cast)
99+
}
100+
101+
/// Get the return type.
102+
pub fn return_type(&self) -> Option<TypeExpr> {
103+
self.syntax.children().find_map(TypeExpr::cast)
104+
}
105+
106+
/// Get the function body.
107+
pub fn body(&self) -> Option<FunctionBody> {
108+
self.syntax.children().find_map(FunctionBody::cast)
109+
}
110+
}
111+
112+
impl ParameterList {
113+
/// Get all parameters.
114+
pub fn params(&self) -> impl Iterator<Item = Parameter> {
115+
self.syntax.children().filter_map(Parameter::cast)
116+
}
117+
}
118+
119+
impl ClassDef {
120+
/// Get the class name.
121+
pub fn name(&self) -> Option<SyntaxToken> {
122+
self.syntax
123+
.children_with_tokens()
124+
.filter_map(rowan::NodeOrToken::into_token)
125+
.filter(|token| {
126+
token.kind() == SyntaxKind::WORD && token.parent() == Some(self.syntax.clone())
127+
})
128+
.nth(1) // Skip the "class" keyword, get the second WORD
129+
}
130+
131+
/// Get all fields.
132+
pub fn fields(&self) -> impl Iterator<Item = Field> {
133+
self.syntax.children().filter_map(Field::cast)
134+
}
135+
136+
/// Get block attributes (@@dynamic).
137+
pub fn block_attributes(&self) -> impl Iterator<Item = BlockAttribute> {
138+
self.syntax.children().filter_map(BlockAttribute::cast)
139+
}
140+
}
141+
142+
impl Field {
143+
/// Get the field name.
144+
pub fn name(&self) -> Option<SyntaxToken> {
145+
self.syntax
146+
.children_with_tokens()
147+
.filter_map(rowan::NodeOrToken::into_token)
148+
.find(|token| token.kind() == SyntaxKind::WORD)
149+
}
150+
151+
/// Get the field type.
152+
pub fn ty(&self) -> Option<TypeExpr> {
153+
self.syntax.children().find_map(TypeExpr::cast)
154+
}
155+
156+
/// Get field attributes (@alias, @description, etc.).
157+
pub fn attributes(&self) -> impl Iterator<Item = Attribute> {
158+
self.syntax.children().filter_map(Attribute::cast)
159+
}
160+
}
161+
162+
/// Enum for any top-level item.
163+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
164+
pub enum Item {
165+
Function(FunctionDef),
166+
Class(ClassDef),
167+
Enum(EnumDef),
168+
Client(ClientDef),
169+
Test(TestDef),
170+
RetryPolicy(RetryPolicyDef),
171+
TemplateString(TemplateStringDef),
172+
TypeAlias(TypeAliasDef),
173+
}
174+
175+
impl AstNode for Item {
176+
type Language = crate::BamlLanguage;
177+
178+
fn can_cast(kind: <Self::Language as rowan::Language>::Kind) -> bool {
179+
matches!(
180+
kind,
181+
SyntaxKind::FUNCTION_DEF
182+
| SyntaxKind::CLASS_DEF
183+
| SyntaxKind::ENUM_DEF
184+
| SyntaxKind::CLIENT_DEF
185+
| SyntaxKind::TEST_DEF
186+
| SyntaxKind::RETRY_POLICY_DEF
187+
| SyntaxKind::TEMPLATE_STRING_DEF
188+
| SyntaxKind::TYPE_ALIAS_DEF
189+
)
190+
}
191+
192+
fn cast(syntax: SyntaxNode) -> Option<Self> {
193+
match syntax.kind() {
194+
SyntaxKind::FUNCTION_DEF => Some(Item::Function(FunctionDef { syntax })),
195+
SyntaxKind::CLASS_DEF => Some(Item::Class(ClassDef { syntax })),
196+
SyntaxKind::ENUM_DEF => Some(Item::Enum(EnumDef { syntax })),
197+
SyntaxKind::CLIENT_DEF => Some(Item::Client(ClientDef { syntax })),
198+
SyntaxKind::TEST_DEF => Some(Item::Test(TestDef { syntax })),
199+
SyntaxKind::RETRY_POLICY_DEF => Some(Item::RetryPolicy(RetryPolicyDef { syntax })),
200+
SyntaxKind::TEMPLATE_STRING_DEF => {
201+
Some(Item::TemplateString(TemplateStringDef { syntax }))
202+
}
203+
SyntaxKind::TYPE_ALIAS_DEF => Some(Item::TypeAlias(TypeAliasDef { syntax })),
204+
_ => None,
205+
}
206+
}
207+
208+
fn syntax(&self) -> &SyntaxNode {
209+
match self {
210+
Item::Function(it) => it.syntax(),
211+
Item::Class(it) => it.syntax(),
212+
Item::Enum(it) => it.syntax(),
213+
Item::Client(it) => it.syntax(),
214+
Item::Test(it) => it.syntax(),
215+
Item::RetryPolicy(it) => it.syntax(),
216+
Item::TemplateString(it) => it.syntax(),
217+
Item::TypeAlias(it) => it.syntax(),
218+
}
219+
}
220+
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
//! Utilities for building syntax trees programmatically.
2+
//! Primarily used for testing.
3+
4+
use crate::SyntaxKind;
5+
use rowan::{GreenNode, GreenNodeBuilder};
6+
7+
/// Builder for constructing syntax trees.
8+
pub struct SyntaxTreeBuilder {
9+
builder: GreenNodeBuilder<'static>,
10+
}
11+
12+
impl SyntaxTreeBuilder {
13+
/// Create a new tree builder.
14+
pub fn new() -> Self {
15+
Self {
16+
builder: GreenNodeBuilder::new(),
17+
}
18+
}
19+
20+
/// Start a new node of the given kind.
21+
pub fn start_node(&mut self, kind: SyntaxKind) {
22+
self.builder.start_node(kind.into());
23+
}
24+
25+
/// Finish the current node.
26+
pub fn finish_node(&mut self) {
27+
self.builder.finish_node();
28+
}
29+
30+
/// Add a token to the tree.
31+
pub fn token(&mut self, kind: SyntaxKind, text: &str) {
32+
self.builder.token(kind.into(), text);
33+
}
34+
35+
/// Add whitespace.
36+
pub fn ws(&mut self, text: &str) {
37+
self.token(SyntaxKind::WHITESPACE, text);
38+
}
39+
40+
/// Add a newline.
41+
pub fn nl(&mut self) {
42+
self.token(SyntaxKind::NEWLINE, "\n");
43+
}
44+
45+
/// Build and consume the builder, returning the green tree.
46+
pub fn finish(self) -> GreenNode {
47+
self.builder.finish()
48+
}
49+
50+
/// Build a simple function for testing.
51+
pub fn build_function(name: &str, params: &[(&str, &str)], ret_type: &str) -> GreenNode {
52+
let mut builder = Self::new();
53+
54+
builder.start_node(SyntaxKind::SOURCE_FILE);
55+
builder.start_node(SyntaxKind::FUNCTION_DEF);
56+
57+
// function keyword
58+
builder.token(SyntaxKind::WORD, "function");
59+
builder.ws(" ");
60+
61+
// function name
62+
builder.token(SyntaxKind::WORD, name);
63+
64+
// parameters
65+
builder.start_node(SyntaxKind::PARAMETER_LIST);
66+
builder.token(SyntaxKind::L_PAREN, "(");
67+
68+
for (i, (param_name, param_type)) in params.iter().enumerate() {
69+
if i > 0 {
70+
builder.token(SyntaxKind::COMMA, ",");
71+
builder.ws(" ");
72+
}
73+
74+
builder.start_node(SyntaxKind::PARAMETER);
75+
builder.token(SyntaxKind::WORD, param_name);
76+
builder.token(SyntaxKind::COLON, ":");
77+
builder.ws(" ");
78+
builder.start_node(SyntaxKind::TYPE_EXPR);
79+
builder.token(SyntaxKind::WORD, param_type);
80+
builder.finish_node(); // TYPE_EXPR
81+
builder.finish_node(); // PARAMETER
82+
}
83+
84+
builder.token(SyntaxKind::R_PAREN, ")");
85+
builder.finish_node(); // PARAMETER_LIST
86+
87+
// return type
88+
builder.ws(" ");
89+
builder.token(SyntaxKind::ARROW, "->");
90+
builder.ws(" ");
91+
builder.start_node(SyntaxKind::TYPE_EXPR);
92+
builder.token(SyntaxKind::WORD, ret_type);
93+
builder.finish_node(); // TYPE_EXPR
94+
95+
// body
96+
builder.ws(" ");
97+
builder.start_node(SyntaxKind::FUNCTION_BODY);
98+
builder.token(SyntaxKind::L_BRACE, "{");
99+
builder.nl();
100+
builder.ws(" ");
101+
builder.token(SyntaxKind::WORD, "client");
102+
builder.ws(" ");
103+
builder.token(SyntaxKind::WORD, "GPT4");
104+
builder.nl();
105+
builder.token(SyntaxKind::R_BRACE, "}");
106+
builder.finish_node(); // FUNCTION_BODY
107+
108+
builder.finish_node(); // FUNCTION_DEF
109+
builder.finish_node(); // SOURCE_FILE
110+
111+
builder.finish()
112+
}
113+
114+
/// Build a simple class for testing.
115+
pub fn build_class(name: &str, fields: &[(&str, &str)]) -> GreenNode {
116+
let mut builder = Self::new();
117+
118+
builder.start_node(SyntaxKind::SOURCE_FILE);
119+
builder.start_node(SyntaxKind::CLASS_DEF);
120+
121+
// class keyword
122+
builder.token(SyntaxKind::WORD, "class");
123+
builder.ws(" ");
124+
125+
// class name
126+
builder.token(SyntaxKind::WORD, name);
127+
builder.ws(" ");
128+
129+
// body
130+
builder.token(SyntaxKind::L_BRACE, "{");
131+
builder.nl();
132+
133+
// fields
134+
for (field_name, field_type) in fields {
135+
builder.ws(" ");
136+
builder.start_node(SyntaxKind::FIELD);
137+
builder.token(SyntaxKind::WORD, field_name);
138+
builder.ws(" ");
139+
builder.start_node(SyntaxKind::TYPE_EXPR);
140+
builder.token(SyntaxKind::WORD, field_type);
141+
builder.finish_node(); // TYPE_EXPR
142+
builder.finish_node(); // FIELD
143+
builder.nl();
144+
}
145+
146+
builder.token(SyntaxKind::R_BRACE, "}");
147+
builder.finish_node(); // CLASS_DEF
148+
builder.finish_node(); // SOURCE_FILE
149+
150+
builder.finish()
151+
}
152+
}
153+
154+
impl Default for SyntaxTreeBuilder {
155+
fn default() -> Self {
156+
Self::new()
157+
}
158+
}

0 commit comments

Comments
 (0)