Skip to content
This repository was archived by the owner on Sep 9, 2025. It is now read-only.

Commit 21a916a

Browse files
author
Hendrik van Antwerpen
committed
Access content lazily instead of requiring an eager value
1 parent 4f90a29 commit 21a916a

File tree

6 files changed

+102
-64
lines changed

6 files changed

+102
-64
lines changed

tree-sitter-stack-graphs/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
- A new `CancelAfterDuration` implementation of `CancellationFlag` that cancels the computation after a certain amount of time.
1515

16+
#### Changed
17+
18+
- The `LanguageConfiguration::matches_file` method takes a `ContentProvider` instead of an `Option<&str>` value. This allows lazy file reading *after* the filename is checked, instead of the unconditional loading required before. To give content readers the opportunity to cache read values, a mutable reference is required. The return type has changed to `std::io::Result` to propagate possible errors from content providers. A `FileReader` implementation that caches the last read file is provided as well.
19+
1620
### CLI
1721

1822
#### Added

tree-sitter-stack-graphs/src/cli/analyze.rs

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use walkdir::WalkDir;
2323
use crate::cli::util::duration_from_seconds_str;
2424
use crate::cli::util::map_parse_errors;
2525
use crate::cli::util::path_exists;
26+
use crate::loader::FileReader;
2627
use crate::loader::Loader;
2728
use crate::CancelAfterDuration;
2829
use crate::CancellationFlag;
@@ -77,24 +78,11 @@ impl AnalyzeArgs {
7778
.filter(|e| e.file_type().is_file())
7879
{
7980
let source_path = source_entry.path();
80-
if let Err(e) = self.analyze_file_with_context(source_root, source_path, loader)
81-
{
82-
eprintln!(
83-
"Skipping file {} due to analysis failure {}",
84-
source_path.display(),
85-
e.to_string()
86-
);
87-
}
81+
self.analyze_file_with_context(source_root, source_path, loader)?;
8882
}
8983
} else {
9084
let source_root = source_path.parent().unwrap();
91-
if let Err(e) = self.analyze_file_with_context(source_root, source_path, loader) {
92-
eprintln!(
93-
"Skipping file {} due to analysis failure {}",
94-
source_path.display(),
95-
e.to_string()
96-
);
97-
};
85+
self.analyze_file_with_context(source_root, source_path, loader)?;
9886
}
9987
}
10088
Ok(())
@@ -119,8 +107,8 @@ impl AnalyzeArgs {
119107
) -> anyhow::Result<()> {
120108
let mut file_status = FileStatusLogger::new(source_path, self.verbose);
121109

122-
let source = std::fs::read_to_string(source_path)?;
123-
let lc = match loader.load_for_file(source_path, Some(&source), &NoCancellation) {
110+
let mut file_reader = FileReader::new();
111+
let lc = match loader.load_for_file(source_path, &mut file_reader, &NoCancellation) {
124112
Ok(Some(sgl)) => sgl,
125113
Ok(None) => return Ok(()),
126114
Err(crate::loader::LoadError::Cancelled(_)) => {
@@ -129,6 +117,7 @@ impl AnalyzeArgs {
129117
}
130118
Err(e) => return Err(e.into()),
131119
};
120+
let source = file_reader.get(source_path)?;
132121

133122
let mut cancellation_flag: Arc<dyn CancellationFlag> = Arc::new(NoCancellation);
134123
if let Some(max_file_time) = self.max_file_time {

tree-sitter-stack-graphs/src/cli/parse.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use tree_sitter::Parser;
1515
use tree_sitter_graph::parse_error::ParseError;
1616

1717
use crate::cli::util::path_exists;
18+
use crate::loader::FileReader;
1819
use crate::loader::Loader;
1920
use crate::LoadError;
2021

@@ -34,11 +35,12 @@ impl ParseArgs {
3435
}
3536

3637
fn parse_file(&self, file_path: &Path, loader: &mut Loader) -> anyhow::Result<()> {
37-
let source = std::fs::read_to_string(file_path)?;
38-
let lang = match loader.load_tree_sitter_language_for_file(file_path, Some(&source))? {
38+
let mut file_reader = FileReader::new();
39+
let lang = match loader.load_tree_sitter_language_for_file(file_path, &mut file_reader)? {
3940
Some(sgl) => sgl,
4041
None => return Err(anyhow!("No stack graph language found")),
4142
};
43+
let source = file_reader.get(file_path)?;
4244

4345
let mut parser = Parser::new();
4446
parser.set_language(lang)?;

tree-sitter-stack-graphs/src/cli/test.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use walkdir::WalkDir;
2424
use crate::cli::util::map_parse_errors;
2525
use crate::cli::util::path_exists;
2626
use crate::cli::util::PathSpec;
27+
use crate::loader::FileReader;
2728
use crate::loader::LanguageConfiguration;
2829
use crate::loader::Loader;
2930
use crate::test::Test;
@@ -193,8 +194,8 @@ impl TestArgs {
193194
test_path: &Path,
194195
loader: &mut Loader,
195196
) -> anyhow::Result<TestResult> {
196-
let source = std::fs::read_to_string(test_path)?;
197-
let lc = match loader.load_for_file(test_path, Some(&source), &NoCancellation)? {
197+
let mut file_reader = FileReader::new();
198+
let lc = match loader.load_for_file(test_path, &mut file_reader, &NoCancellation)? {
198199
Some(sgl) => sgl,
199200
None => {
200201
if self.show_ignored {
@@ -203,6 +204,7 @@ impl TestArgs {
203204
return Ok(TestResult::new());
204205
}
205206
};
207+
let source = file_reader.get(test_path)?;
206208
let default_fragment_path = test_path.strip_prefix(test_root).unwrap();
207209
let mut test = Test::from_source(&test_path, &source, default_fragment_path)?;
208210
self.load_builtins_into(&lc, &mut test.graph)
@@ -224,7 +226,10 @@ impl TestArgs {
224226
&test_fragment.globals,
225227
&NoCancellation,
226228
)?;
227-
} else if lc.matches_file(&test_fragment.path, Some(&test_fragment.source)) {
229+
} else if lc.matches_file(
230+
&test_fragment.path,
231+
&mut Some(test_fragment.source.as_ref()),
232+
)? {
228233
globals.clear();
229234
test_fragment.add_globals_to(&mut globals);
230235
self.build_fragment_stack_graph_into(

tree-sitter-stack-graphs/src/loader.rs

Lines changed: 78 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,12 @@ impl LanguageConfiguration {
8686
})
8787
}
8888

89-
pub fn matches_file(&self, path: &Path, content: Option<&str>) -> bool {
90-
matches_file(&self.file_types, &self.content_regex, path, content).is_some()
89+
pub fn matches_file(
90+
&self,
91+
path: &Path,
92+
content: &mut dyn ContentProvider,
93+
) -> std::io::Result<bool> {
94+
matches_file(&self.file_types, &self.content_regex, path, content).map(|l| l.is_some())
9195
}
9296
}
9397

@@ -205,7 +209,7 @@ impl Loader {
205209
pub fn load_tree_sitter_language_for_file(
206210
&mut self,
207211
path: &Path,
208-
content: Option<&str>,
212+
content: &mut dyn ContentProvider,
209213
) -> Result<Option<tree_sitter::Language>, LoadError> {
210214
match &mut self.0 {
211215
LoaderImpl::Paths(loader) => loader.load_tree_sitter_language_for_file(path, content),
@@ -219,7 +223,7 @@ impl Loader {
219223
pub fn load_for_file(
220224
&mut self,
221225
path: &Path,
222-
content: Option<&str>,
226+
content: &mut dyn ContentProvider,
223227
cancellation_flag: &dyn CancellationFlag,
224228
) -> Result<Option<&LanguageConfiguration>, LoadError> {
225229
match &mut self.0 {
@@ -328,34 +332,28 @@ impl LanguageConfigurationsLoader {
328332
pub fn load_tree_sitter_language_for_file(
329333
&mut self,
330334
path: &Path,
331-
content: Option<&str>,
335+
content: &mut dyn ContentProvider,
332336
) -> Result<Option<tree_sitter::Language>, LoadError> {
333-
let configuration = match self
334-
.configurations
335-
.iter()
336-
.find(|l| l.matches_file(path, content))
337-
{
338-
Some(language) => language,
339-
None => return Ok(None),
340-
};
341-
Ok(Some(configuration.language))
337+
for configuration in self.configurations.iter() {
338+
if configuration.matches_file(path, content)? {
339+
return Ok(Some(configuration.language));
340+
}
341+
}
342+
Ok(None)
342343
}
343344

344345
/// Load a stack graph language for the given file. Loading is based on the loader configuration and the given file path.
345346
pub fn load_for_file(
346347
&mut self,
347348
path: &Path,
348-
content: Option<&str>,
349+
content: &mut dyn ContentProvider,
349350
) -> Result<Option<&LanguageConfiguration>, LoadError> {
350-
let language = match self
351-
.configurations
352-
.iter()
353-
.find(|l| l.matches_file(path, content))
354-
{
355-
Some(language) => language,
356-
None => return Ok(None),
357-
};
358-
Ok(Some(language))
351+
for language in self.configurations.iter() {
352+
if language.matches_file(path, content)? {
353+
return Ok(Some(language));
354+
}
355+
}
356+
Ok(None)
359357
}
360358
}
361359

@@ -400,7 +398,7 @@ impl PathLoader {
400398
pub fn load_tree_sitter_language_for_file(
401399
&mut self,
402400
path: &Path,
403-
content: Option<&str>,
401+
content: &mut dyn ContentProvider,
404402
) -> Result<Option<tree_sitter::Language>, LoadError> {
405403
if let Some(selected_language) = self.select_language_for_file(path, content)? {
406404
return Ok(Some(selected_language.language));
@@ -411,7 +409,7 @@ impl PathLoader {
411409
pub fn load_for_file(
412410
&mut self,
413411
path: &Path,
414-
content: Option<&str>,
412+
content: &mut dyn ContentProvider,
415413
cancellation_flag: &dyn CancellationFlag,
416414
) -> Result<Option<&LanguageConfiguration>, LoadError> {
417415
let selected_language = self.select_language_for_file(path, content)?;
@@ -457,7 +455,7 @@ impl PathLoader {
457455
fn select_language_for_file(
458456
&mut self,
459457
file_path: &Path,
460-
file_content: Option<&str>,
458+
file_content: &mut dyn ContentProvider,
461459
) -> Result<Option<&SupplementedLanguage>, LoadError> {
462460
// The borrow checker is not smart enough to realize that the early returns
463461
// ensure any references from the self.select_* call (which require a mutable
@@ -494,7 +492,7 @@ impl PathLoader {
494492
&mut self,
495493
language_path: &Path,
496494
file_path: &Path,
497-
file_content: Option<&str>,
495+
file_content: &mut dyn ContentProvider,
498496
) -> Result<Option<&SupplementedLanguage>, LoadError> {
499497
let scope = self.scope.as_deref();
500498
let languages = self.loader.languages_at_path(language_path, scope)?;
@@ -506,7 +504,7 @@ impl PathLoader {
506504
)));
507505
}
508506
if let Some(language) =
509-
SupplementedLanguage::best_for_file(languages, file_path, file_content)
507+
SupplementedLanguage::best_for_file(languages, file_path, file_content)?
510508
{
511509
return Ok(Some(language));
512510
};
@@ -641,27 +639,31 @@ impl SupplementedLanguage {
641639
}
642640

643641
// Extracted from tree_sitter_loader::Loader::language_configuration_for_file_name
644-
pub fn matches_file(&self, path: &Path, content: Option<&str>) -> Option<isize> {
642+
pub fn matches_file(
643+
&self,
644+
path: &Path,
645+
content: &mut dyn ContentProvider,
646+
) -> std::io::Result<Option<isize>> {
645647
matches_file(&self.file_types, &self.content_regex, path, content)
646648
}
647649

648650
// Extracted from tree_sitter_loader::Loader::language_configuration_for_file_name
649651
pub fn best_for_file<'a>(
650652
languages: Vec<&'a SupplementedLanguage>,
651653
path: &Path,
652-
content: Option<&str>,
653-
) -> Option<&'a SupplementedLanguage> {
654+
content: &mut dyn ContentProvider,
655+
) -> std::io::Result<Option<&'a SupplementedLanguage>> {
654656
let mut best_score = -1isize;
655657
let mut best = None;
656658
for language in languages {
657-
if let Some(score) = language.matches_file(path, content) {
659+
if let Some(score) = language.matches_file(path, content)? {
658660
if score > best_score {
659661
best_score = score;
660662
best = Some(language);
661663
}
662664
}
663665
}
664-
best
666+
Ok(best)
665667
}
666668
}
667669

@@ -682,28 +684,64 @@ pub fn matches_file(
682684
file_types: &Vec<String>,
683685
content_regex: &Option<Regex>,
684686
path: &Path,
685-
content: Option<&str>,
686-
) -> Option<isize> {
687+
content: &mut dyn ContentProvider,
688+
) -> std::io::Result<Option<isize>> {
687689
// Check path extension
688690
if !path
689691
.extension()
690692
.and_then(OsStr::to_str)
691693
.map_or(false, |ext| file_types.iter().any(|ft| ft == ext))
692694
{
693-
return None;
695+
return Ok(None);
694696
}
695697

696698
// Apply content regex
699+
let content = content.get(path)?;
697700
if let (Some(file_content), Some(content_regex)) = (content, &content_regex) {
698701
// If the language configuration has a content regex, assign
699702
// a score based on the length of the first match.
700703
if let Some(mat) = content_regex.find(&file_content) {
701704
let score = (mat.end() - mat.start()) as isize;
702-
return Some(score);
705+
return Ok(Some(score));
703706
} else {
704-
return None;
707+
return Ok(None);
705708
}
706709
}
707710

708-
Some(0isize)
711+
Ok(Some(0isize))
712+
}
713+
714+
pub trait ContentProvider {
715+
fn get(&mut self, path: &Path) -> std::io::Result<Option<&str>>;
716+
}
717+
718+
/// FileReader reads files from the filesystem and caches the most recently read file.
719+
pub struct FileReader {
720+
cache: Option<(PathBuf, String)>,
721+
}
722+
723+
impl FileReader {
724+
pub fn new() -> Self {
725+
Self { cache: None }
726+
}
727+
728+
pub fn get(&mut self, path: &Path) -> std::io::Result<&str> {
729+
if self.cache.as_ref().map_or(true, |(p, _)| p != path) {
730+
let content = std::fs::read_to_string(path)?;
731+
self.cache = Some((path.to_path_buf(), content));
732+
}
733+
Ok(&self.cache.as_ref().unwrap().1)
734+
}
735+
}
736+
737+
impl ContentProvider for FileReader {
738+
fn get(&mut self, path: &Path) -> std::io::Result<Option<&str>> {
739+
self.get(path).map(|c| Some(c))
740+
}
741+
}
742+
743+
impl ContentProvider for Option<&str> {
744+
fn get(&mut self, _path: &Path) -> std::io::Result<Option<&str>> {
745+
Ok(self.clone())
746+
}
709747
}

tree-sitter-stack-graphs/tests/it/loader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@ fn can_load_from_provided_language_configuration() {
4040
Loader::from_language_configurations(vec![lc], None).expect("Expected loader to succeed");
4141

4242
let tsl = loader
43-
.load_tree_sitter_language_for_file(&PATH, None)
43+
.load_tree_sitter_language_for_file(&PATH, &mut None)
4444
.expect("Expected loading tree-sitter language to succeed");
4545
assert_eq!(tsl, Some(language));
4646

4747
let lc = loader
48-
.load_for_file(&PATH, None, &NoCancellation)
48+
.load_for_file(&PATH, &mut None, &NoCancellation)
4949
.expect("Expected loading stack graph language to succeed");
5050
assert_eq!(lc.map(|lc| lc.language), Some(language));
5151
}

0 commit comments

Comments
 (0)