Skip to content

Commit 8bb1448

Browse files
committed
feat: add the semantic version to TSLanguage, and expose an API for retrieving it
1 parent f022210 commit 8bb1448

File tree

24 files changed

+371
-77
lines changed

24 files changed

+371
-77
lines changed

cli/generate/src/lib.rs

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,10 @@ use std::{
77
};
88

99
use anyhow::Result;
10-
use build_tables::build_tables;
11-
use grammars::InputGrammar;
12-
pub use node_types::VariableInfoError;
13-
use parse_grammar::parse_grammar;
14-
pub use parse_grammar::ParseGrammarError;
15-
use prepare_grammar::prepare_grammar;
16-
pub use prepare_grammar::PrepareGrammarError;
1710
use regex::{Regex, RegexBuilder};
18-
use render::render_c_code;
1911
use semver::Version;
12+
use serde::{Deserialize, Serialize};
13+
use thiserror::Error;
2014

2115
mod build_tables;
2216
mod dedup;
@@ -30,9 +24,15 @@ mod render;
3024
mod rules;
3125
mod tables;
3226

27+
use build_tables::build_tables;
3328
pub use build_tables::ParseTableBuilderError;
34-
use serde::Serialize;
35-
use thiserror::Error;
29+
use grammars::InputGrammar;
30+
pub use node_types::VariableInfoError;
31+
use parse_grammar::parse_grammar;
32+
pub use parse_grammar::ParseGrammarError;
33+
use prepare_grammar::prepare_grammar;
34+
pub use prepare_grammar::PrepareGrammarError;
35+
use render::render_c_code;
3636

3737
static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
3838
RegexBuilder::new("^\\s*//.*")
@@ -67,6 +67,8 @@ pub enum GenerateError {
6767
VariableInfo(#[from] VariableInfoError),
6868
#[error(transparent)]
6969
BuildTables(#[from] ParseTableBuilderError),
70+
#[error(transparent)]
71+
ParseVersion(#[from] ParseVersionError),
7072
}
7173

7274
impl From<std::io::Error> for GenerateError {
@@ -95,6 +97,16 @@ impl From<std::io::Error> for LoadGrammarError {
9597
}
9698
}
9799

100+
#[derive(Debug, Error, Serialize)]
101+
pub enum ParseVersionError {
102+
#[error("{0}")]
103+
Version(String),
104+
#[error("{0}")]
105+
JSON(String),
106+
#[error("{0}")]
107+
IO(String),
108+
}
109+
98110
pub type JSResult<T> = Result<T, JSError>;
99111

100112
#[derive(Debug, Error, Serialize)]
@@ -178,11 +190,18 @@ pub fn generate_parser_in_directory(
178190
// Parse and preprocess the grammar.
179191
let input_grammar = parse_grammar(&grammar_json)?;
180192

193+
let semantic_version = read_grammar_version(&repo_path)?;
194+
181195
// Generate the parser and related files.
182196
let GeneratedParser {
183197
c_code,
184198
node_types_json,
185-
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
199+
} = generate_parser_for_grammar_with_opts(
200+
&input_grammar,
201+
abi_version,
202+
semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
203+
report_symbol_name,
204+
)?;
186205

187206
write_file(&src_path.join("parser.c"), c_code)?;
188207
write_file(&src_path.join("node-types.json"), node_types_json)?;
@@ -193,17 +212,25 @@ pub fn generate_parser_in_directory(
193212
Ok(())
194213
}
195214

196-
pub fn generate_parser_for_grammar(grammar_json: &str) -> GenerateResult<(String, String)> {
215+
pub fn generate_parser_for_grammar(
216+
grammar_json: &str,
217+
semantic_version: Option<(u8, u8, u8)>,
218+
) -> GenerateResult<(String, String)> {
197219
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
198220
let input_grammar = parse_grammar(&grammar_json)?;
199-
let parser =
200-
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
221+
let parser = generate_parser_for_grammar_with_opts(
222+
&input_grammar,
223+
tree_sitter::LANGUAGE_VERSION,
224+
semantic_version,
225+
None,
226+
)?;
201227
Ok((input_grammar.name, parser.c_code))
202228
}
203229

204230
fn generate_parser_for_grammar_with_opts(
205231
input_grammar: &InputGrammar,
206232
abi_version: usize,
233+
semantic_version: Option<(u8, u8, u8)>,
207234
report_symbol_name: Option<&str>,
208235
) -> GenerateResult<GeneratedParser> {
209236
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
@@ -233,6 +260,7 @@ fn generate_parser_for_grammar_with_opts(
233260
lexical_grammar,
234261
simple_aliases,
235262
abi_version,
263+
semantic_version,
236264
supertype_symbol_map,
237265
);
238266
Ok(GeneratedParser {
@@ -241,6 +269,55 @@ fn generate_parser_for_grammar_with_opts(
241269
})
242270
}
243271

272+
/// This will read the `tree-sitter.json` config file and attempt to extract the version.
273+
///
274+
/// If the file is not found in the current directory or any of its parent directories, this will
275+
/// return `None` to maintain backwards compatibility. If the file is found but the version cannot
276+
/// be parsed as semver, this will return an error.
277+
fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
278+
#[derive(Deserialize)]
279+
struct TreeSitterJson {
280+
metadata: Metadata,
281+
}
282+
283+
#[derive(Deserialize)]
284+
struct Metadata {
285+
version: String,
286+
}
287+
288+
let filename = "tree-sitter.json";
289+
let mut path = repo_path.join(filename);
290+
291+
loop {
292+
let json = path
293+
.exists()
294+
.then(|| {
295+
let contents = fs::read_to_string(path.as_path()).map_err(|e| {
296+
ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display()))
297+
})?;
298+
serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
299+
ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
300+
})
301+
})
302+
.transpose()?;
303+
if let Some(json) = json {
304+
return Version::parse(&json.metadata.version)
305+
.map_err(|e| {
306+
ParseVersionError::Version(format!(
307+
"Failed to parse `{}` version as semver -- {e}",
308+
path.display()
309+
))
310+
})
311+
.map(Some);
312+
}
313+
path.pop(); // filename
314+
if !path.pop() {
315+
return Ok(None);
316+
}
317+
path.push(filename);
318+
}
319+
}
320+
244321
pub fn load_grammar_file(
245322
grammar_path: &Path,
246323
js_runtime: Option<&str>,

cli/generate/src/render.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use std::{
55
mem::swap,
66
};
77

8+
use indoc::indoc;
9+
810
use super::{
911
build_tables::Tables,
1012
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
@@ -83,16 +85,21 @@ struct Generator {
8385
field_names: Vec<String>,
8486
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
8587
supertype_map: BTreeMap<String, Vec<ChildType>>,
86-
87-
#[allow(unused)]
8888
abi_version: usize,
89+
metadata: Option<Metadata>,
8990
}
9091

9192
struct LargeCharacterSetInfo {
9293
constant_name: String,
9394
is_used: bool,
9495
}
9596

97+
struct Metadata {
98+
major_version: u8,
99+
minor_version: u8,
100+
patch_version: u8,
101+
}
102+
96103
impl Generator {
97104
fn generate(mut self) -> String {
98105
self.init();
@@ -1539,7 +1546,7 @@ impl Generator {
15391546
indent!(self);
15401547
add_line!(self, "static const TSLanguage language = {{");
15411548
indent!(self);
1542-
add_line!(self, ".version = LANGUAGE_VERSION,");
1549+
add_line!(self, ".abi_version = LANGUAGE_VERSION,");
15431550

15441551
// Quantities
15451552
add_line!(self, ".symbol_count = SYMBOL_COUNT,");
@@ -1629,6 +1636,24 @@ impl Generator {
16291636
.max()
16301637
.unwrap()
16311638
);
1639+
1640+
let Some(metadata) = &self.metadata else {
1641+
panic!(
1642+
indoc! {"
1643+
Metadata is required to generate ABI version {}.
1644+
This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
1645+
"},
1646+
self.abi_version
1647+
);
1648+
};
1649+
1650+
add_line!(self, ".metadata = {{");
1651+
indent!(self);
1652+
add_line!(self, ".major_version = {},", metadata.major_version);
1653+
add_line!(self, ".minor_version = {},", metadata.minor_version);
1654+
add_line!(self, ".patch_version = {},", metadata.patch_version);
1655+
dedent!(self);
1656+
add_line!(self, "}},");
16321657
}
16331658

16341659
dedent!(self);
@@ -1914,6 +1939,7 @@ pub fn render_c_code(
19141939
lexical_grammar: LexicalGrammar,
19151940
default_aliases: AliasMap,
19161941
abi_version: usize,
1942+
semantic_version: Option<(u8, u8, u8)>,
19171943
supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
19181944
) -> String {
19191945
assert!(
@@ -1932,6 +1958,11 @@ pub fn render_c_code(
19321958
lexical_grammar,
19331959
default_aliases,
19341960
abi_version,
1961+
metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
1962+
major_version,
1963+
minor_version,
1964+
patch_version,
1965+
}),
19351966
supertype_symbol_map,
19361967
..Default::default()
19371968
}

cli/src/tests/corpus_test.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,8 @@ fn test_feature_corpus_files() {
359359
}
360360
let error_message_path = test_path.join("expected_error.txt");
361361
let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
362-
let generate_result = tree_sitter_generate::generate_parser_for_grammar(&grammar_json);
362+
let generate_result =
363+
tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0)));
363364

364365
if error_message_path.exists() {
365366
if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {

cli/src/tests/language_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ fn test_supertypes() {
101101
let language = get_language("rust");
102102
let supertypes = language.supertypes();
103103

104-
if language.version() < 15 {
104+
if language.abi_version() < 15 {
105105
return;
106106
}
107107

cli/src/tests/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,17 @@ mod tree_test;
1818
#[cfg(feature = "wasm")]
1919
mod wasm_language_test;
2020

21+
use tree_sitter_generate::GenerateResult;
22+
2123
pub use crate::fuzz::{
2224
allocations,
2325
edits::{get_random_edit, invert_edit},
2426
random::Rand,
2527
ITERATION_COUNT,
2628
};
29+
30+
/// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because
31+
/// our tests do not need to pass in a version number, only the grammar JSON.
32+
fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> {
33+
tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0)))
34+
}

cli/src/tests/node_test.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
use tree_sitter::{Node, Parser, Point, Tree};
2-
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
2+
use tree_sitter_generate::load_grammar_file;
33

44
use super::{
55
get_random_edit,
66
helpers::fixtures::{fixtures_dir, get_language, get_test_language},
77
Rand,
88
};
9-
use crate::parse::perform_edit;
9+
use crate::{parse::perform_edit, tests::generate_parser};
1010

1111
const JSON_EXAMPLE: &str = r#"
1212
@@ -317,7 +317,7 @@ fn test_next_sibling_of_zero_width_node() {
317317
)
318318
.unwrap();
319319

320-
let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
320+
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
321321

322322
let mut parser = Parser::new();
323323
let language = get_test_language(&parser_name, &parser_code, None);
@@ -563,8 +563,7 @@ fn test_node_named_child() {
563563

564564
#[test]
565565
fn test_node_named_child_with_aliases_and_extras() {
566-
let (parser_name, parser_code) =
567-
generate_parser_for_grammar(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
566+
let (parser_name, parser_code) = generate_parser(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap();
568567

569568
let mut parser = Parser::new();
570569
parser
@@ -871,7 +870,7 @@ fn test_node_sexp() {
871870

872871
#[test]
873872
fn test_node_field_names() {
874-
let (parser_name, parser_code) = generate_parser_for_grammar(
873+
let (parser_name, parser_code) = generate_parser(
875874
r#"
876875
{
877876
"name": "test_grammar_with_fields",
@@ -981,7 +980,7 @@ fn test_node_field_names() {
981980

982981
#[test]
983982
fn test_node_field_calls_in_language_without_fields() {
984-
let (parser_name, parser_code) = generate_parser_for_grammar(
983+
let (parser_name, parser_code) = generate_parser(
985984
r#"
986985
{
987986
"name": "test_grammar_with_no_fields",
@@ -1039,7 +1038,7 @@ fn test_node_is_named_but_aliased_as_anonymous() {
10391038
)
10401039
.unwrap();
10411040

1042-
let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
1041+
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
10431042

10441043
let mut parser = Parser::new();
10451044
let language = get_test_language(&parser_name, &parser_code, None);

cli/src/tests/parser_hang_test.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ use std::{
77
};
88

99
use tree_sitter::Parser;
10-
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
10+
use tree_sitter_generate::load_grammar_file;
1111

12+
use super::generate_parser;
1213
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
1314

1415
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
@@ -90,7 +91,7 @@ fn hang_test() {
9091
.join("get_col_should_hang_not_crash");
9192

9293
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
93-
let (parser_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap();
94+
let (parser_name, parser_code) = generate_parser(grammar_json.as_str()).unwrap();
9495

9596
let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
9697

0 commit comments

Comments
 (0)