Skip to content

Commit 42bf1b1

Browse files
authored
Merge pull request #1 from tazjin/performance-improvements
Significant performance improvements for large projects: - removed usage of tokio for now (not too much concurency since canonical path building seems to take the bulk of the time and that operation is not async) - made canonical path resolution be cached as it is expensive
2 parents a2bd345 + 99d88d7 commit 42bf1b1

File tree

11 files changed

+401
-516
lines changed

11 files changed

+401
-516
lines changed

Cargo.lock

Lines changed: 251 additions & 396 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ path="src/lib.rs"
1414
crate-type = ["lib"]
1515

1616
[dependencies]
17-
tokio = { version = "1", features = ["full"]}
1817
thiserror = "1"
1918
tracing = { version = "0.1"}
2019
shlex = { version = "1.3.0"}
@@ -38,3 +37,10 @@ camino = "1.1.6"
3837
tera = "1.19.1"
3938
eyre = "0.6.12"
4039
color-eyre = "0.6.2"
40+
41+
# Add a profile to all targets that enables release optimisations, but
42+
# retains debug symbols. This is great for use with
43+
# benchmarking/profiling tools.
44+
[profile.release-with-debug]
45+
inherits = "release"
46+
debug = true

src/dependencies.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub mod canonicalize;
12
pub mod compiledb;
23
pub mod configfile;
34
pub mod cparse;

src/dependencies/canonicalize.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use std::borrow::Borrow;
2+
use std::collections::HashMap;
3+
use std::fs::canonicalize;
4+
use std::hash::Hash;
5+
use std::path::{Path, PathBuf};
6+
use std::sync::{Arc, LazyLock, RwLock};
7+
8+
static PATH_CACHE: LazyLock<Arc<RwLock<HashMap<PathBuf, Option<PathBuf>>>>> =
9+
LazyLock::new(|| Default::default());
10+
11+
/// Wrapper around [`std::fs::canonicalize`] that caches already canonicalized
12+
/// entries globally (often the case when dealing with includes).
13+
///
14+
/// This is done due to the operation being inherently slow: It has to walk the
15+
/// entire parent directory tree (especially expensive on FUSE-mounted virtual
16+
/// repository filesystems).
17+
pub fn canonicalize_cached<P>(path: P) -> Result<Option<PathBuf>, std::io::Error>
18+
where
19+
P: AsRef<Path>,
20+
PathBuf: Borrow<P>,
21+
P: Hash + Eq,
22+
{
23+
{
24+
// First, try the cache ...
25+
let cache = PATH_CACHE.read().unwrap();
26+
if let Some(cached) = cache.get(&path) {
27+
return Ok(cached.clone());
28+
}
29+
}
30+
31+
// ... then look it up ourselves.
32+
let result = if path.as_ref().exists() {
33+
Some(canonicalize(&path)?)
34+
} else {
35+
None
36+
};
37+
38+
let mut cache = PATH_CACHE.write().unwrap();
39+
cache.insert(path.as_ref().to_path_buf(), result.clone());
40+
41+
Ok(result)
42+
}

src/dependencies/compiledb.rs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use serde::{Deserialize, Serialize};
2-
use tokio::{fs::File, io::AsyncReadExt as _};
3-
use tracing::debug;
4-
2+
use std::fs::File;
3+
use std::io::Read;
54
use std::path::PathBuf;
5+
use tracing::debug;
66

7-
use crate::dependencies::error::Error;
7+
use super::canonicalize::canonicalize_cached;
8+
use super::error::Error;
89

910
#[derive(Debug, PartialEq, PartialOrd, Hash, Serialize, Deserialize)]
1011
pub struct SourceFileEntry {
@@ -45,11 +46,12 @@ impl TryFrom<CompileCommandsEntry> for SourceFileEntry {
4546
source_file
4647
};
4748

48-
let file_path = file_path.canonicalize().map_err(|source| Error::IOError {
49-
source,
50-
path: file_path.clone(),
51-
message: "canonicalize",
52-
})?;
49+
let file_path = canonicalize_cached(file_path)
50+
.map_err(|source| Error::IOError {
51+
source,
52+
message: "canonicalize",
53+
})?
54+
.ok_or(Error::FileNotFound)?;
5355

5456
let args = value
5557
.arguments
@@ -61,7 +63,7 @@ impl TryFrom<CompileCommandsEntry> for SourceFileEntry {
6163
.map(PathBuf::from)
6264
.filter_map(|p| {
6365
if p.is_relative() {
64-
start_dir.join(p).canonicalize().ok()
66+
canonicalize_cached(start_dir.join(p)).ok()?
6567
} else {
6668
Some(p)
6769
}
@@ -75,17 +77,16 @@ impl TryFrom<CompileCommandsEntry> for SourceFileEntry {
7577
}
7678
}
7779

78-
pub async fn parse_compile_database(path: &str) -> Result<Vec<SourceFileEntry>, Error> {
79-
let mut file = File::open(path).await.map_err(|source| Error::IOError {
80+
pub fn parse_compile_database(path: &str) -> Result<Vec<SourceFileEntry>, Error> {
81+
let mut file = File::open(path).map_err(|source| Error::FileIOError {
8082
source,
8183
path: path.into(),
8284
message: "open",
8385
})?;
8486
let mut json_string = String::new();
8587

8688
file.read_to_string(&mut json_string)
87-
.await
88-
.map_err(|source| Error::IOError {
89+
.map_err(|source| Error::FileIOError {
8990
source,
9091
path: path.into(),
9192
message: "read_to_string",

src/dependencies/configfile.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ struct ConfigurationFile {
127127
/// What inputs are to be processed
128128
input_commands: Vec<InputCommand>,
129129

130+
/// Should symlinks be resolved, or left alone? Enabling symlink resolution
131+
/// can be significantly slower on large code bases.
132+
130133
/// Instructions to build a braph
131134
graph: GraphInstructions,
132135
}
@@ -756,7 +759,7 @@ fn parse_config(input: &str) -> IResult<&str, ConfigurationFile> {
756759
.parse(input)
757760
}
758761

759-
pub async fn build_graph(input: &str) -> Result<Graph, Report> {
762+
pub fn build_graph(input: &str) -> Result<Graph, Report> {
760763
let (input, config) = parse_config(input)
761764
.map_err(|e| Error::ConfigParseError {
762765
message: format!("Nom error: {:?}", e),
@@ -783,7 +786,7 @@ pub async fn build_graph(input: &str) -> Result<Graph, Report> {
783786
load_include_directories,
784787
load_sources,
785788
} => {
786-
let entries = match parse_compile_database(&path).await {
789+
let entries = match parse_compile_database(&path) {
787790
Ok(entries) => entries,
788791
Err(err) => {
789792
error!("Error parsing compile database {}: {:?}", path, err);
@@ -817,7 +820,7 @@ pub async fn build_graph(input: &str) -> Result<Graph, Report> {
817820
.into_iter()
818821
.collect::<Vec<_>>();
819822
for entry in entries {
820-
match extract_includes(&entry.file_path, &includes_array).await {
823+
match extract_includes(&entry.file_path, &includes_array) {
821824
Ok(includes) => {
822825
info!(target: "compile-db", "Loaded {:?} with includes {:#?}", &entry.file_path, includes);
823826
dependency_data.files.push(SourceWithIncludes {
@@ -851,7 +854,7 @@ pub async fn build_graph(input: &str) -> Result<Graph, Report> {
851854
.clone()
852855
.into_iter()
853856
.collect::<Vec<_>>();
854-
match all_sources_and_includes(glob, &includes_array).await {
857+
match all_sources_and_includes(glob, &includes_array) {
855858
Ok(data) => {
856859
if data.is_empty() {
857860
error!("GLOB {:?} resulted in EMPTY file list!", g);
@@ -926,12 +929,7 @@ pub async fn build_graph(input: &str) -> Result<Graph, Report> {
926929
target,
927930
source_root,
928931
ignore_targets,
929-
} => match load_gn_targets(
930-
&PathBuf::from(gn_root),
931-
&PathBuf::from(source_root),
932-
&target,
933-
)
934-
.await
932+
} => match load_gn_targets(PathBuf::from(gn_root), PathBuf::from(source_root), &target)
935933
{
936934
Ok(targets) => g.add_groups_from_gn(targets, ignore_targets),
937935
Err(e) => error!("Failed to load GN targets: {:?}", e),
@@ -1111,7 +1109,7 @@ mod tests {
11111109
graph {
11121110
map {
11131111
}
1114-
1112+
11151113
group {
11161114
gn root test1 target //my/target/* sources srcs1
11171115
gn root test/${Foo}/blah target //* sources ${Foo} ignore targets {

src/dependencies/cparse.rs

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
1+
use super::canonicalize::canonicalize_cached;
12
use super::error::Error;
23

34
use regex::Regex;
45
use std::{
56
fmt::Debug,
6-
path::{Path, PathBuf},
7-
sync::Arc,
8-
};
9-
use tokio::{
107
fs::File,
11-
io::{AsyncBufReadExt as _, BufReader},
12-
task::JoinSet,
8+
io::{BufRead, BufReader},
9+
path::{Path, PathBuf},
10+
sync::{Arc, LazyLock},
11+
thread,
1312
};
1413
use tracing::{error, info, trace};
1514

1615
/// Attempt to make the full path of head::tail
1716
/// returns None if that fails (e.g. path does not exist)
18-
fn try_resolve(head: &Path, tail: &PathBuf) -> Option<PathBuf> {
19-
head.join(tail).canonicalize().ok()
17+
fn try_resolve(head: &Path, tail: &Path) -> Option<PathBuf> {
18+
canonicalize_cached(head.join(tail)).ok()?
2019
}
2120

2221
#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
@@ -41,41 +40,33 @@ impl FileType {
4140
}
4241
}
4342

43+
static INCLUDE_REGEX: LazyLock<Regex> =
44+
LazyLock::new(|| Regex::new(r##"^\s*#include\s*(["<])([^">]*)[">]"##).unwrap());
45+
4446
/// Given a C-like source, try to resolve includes.
4547
///
4648
/// Includes are generally of the form `#include <name>` or `#include "name"`
47-
pub async fn extract_includes(
48-
path: &PathBuf,
49-
include_dirs: &[PathBuf],
50-
) -> Result<Vec<PathBuf>, Error> {
51-
let f = File::open(path).await.map_err(|source| Error::IOError {
49+
pub fn extract_includes(path: &PathBuf, include_dirs: &[PathBuf]) -> Result<Vec<PathBuf>, Error> {
50+
let f = File::open(path).map_err(|source| Error::FileIOError {
5251
source,
5352
path: path.clone(),
5453
message: "open",
5554
})?;
5655

5756
let reader = BufReader::new(f);
58-
59-
let inc_re = Regex::new(r##"^\s*#include\s*(["<])([^">]*)[">]"##).unwrap();
60-
6157
let mut result = Vec::new();
6258
let parent_dir = PathBuf::from(path.parent().unwrap());
6359

64-
let mut lines = reader.lines();
60+
let lines = reader.lines();
6561

66-
loop {
67-
let line = lines.next_line().await.map_err(|source| Error::IOError {
62+
for line in lines {
63+
let line = line.map_err(|source| Error::FileIOError {
6864
source,
6965
path: path.clone(),
7066
message: "line read",
7167
})?;
7268

73-
let line = match line {
74-
Some(value) => value,
75-
None => break,
76-
};
77-
78-
if let Some(captures) = inc_re.captures(&line) {
69+
if let Some(captures) = INCLUDE_REGEX.captures(&line) {
7970
let inc_type = captures.get(1).unwrap().as_str();
8071
let relative_path = PathBuf::from(captures.get(2).unwrap().as_str());
8172

@@ -115,7 +106,7 @@ pub struct SourceWithIncludes {
115106
}
116107

117108
/// Given a list of paths, figure out their dependencies
118-
pub async fn all_sources_and_includes<I, E>(
109+
pub fn all_sources_and_includes<I, E>(
119110
paths: I,
120111
includes: &[PathBuf],
121112
) -> Result<Vec<SourceWithIncludes>, Error>
@@ -124,14 +115,15 @@ where
124115
E: Debug,
125116
{
126117
let includes = Arc::new(Vec::from(includes));
127-
128-
let mut join_set = JoinSet::new();
118+
let mut handles = Vec::new();
129119

130120
for entry in paths {
131121
let path = match entry {
132-
Ok(value) => value.canonicalize().map_err(|e| Error::Internal {
133-
message: format!("{:?}", e),
134-
})?,
122+
Ok(value) => canonicalize_cached(value)
123+
.map_err(|e| Error::Internal {
124+
message: format!("{:?}", e),
125+
})?
126+
.ok_or(Error::FileNotFound)?,
135127
Err(e) => {
136128
return Err(Error::Internal {
137129
message: format!("{:?}", e),
@@ -144,27 +136,27 @@ where
144136
continue;
145137
}
146138

147-
// prepare data to mve into sub-task
139+
// prepare data to move into sub-task
148140
let includes = includes.clone();
149141

150-
join_set.spawn(async move {
142+
handles.push(thread::spawn(move || {
151143
trace!("PROCESS: {:?}", path);
152-
let includes = match extract_includes(&path, &includes).await {
144+
let includes = match extract_includes(&path, &includes) {
153145
Ok(value) => value,
154146
Err(e) => {
155-
error!("Error extracing includes: {:?}", e);
147+
error!("Error extracting includes: {:?}", e);
156148
return Err(e);
157149
}
158150
};
159151

160152
Ok(SourceWithIncludes { path, includes })
161-
});
153+
}));
162154
}
163155

164156
let mut results = Vec::new();
165-
while let Some(h) = join_set.join_next().await {
166-
let r = h.map_err(Error::JoinError)?;
167-
results.push(r?)
157+
for handle in handles {
158+
let res = handle.join().map_err(|_| Error::JoinError)?;
159+
results.push(res?);
168160
}
169161

170162
Ok(results)

src/dependencies/error.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,19 @@
11
use std::path::PathBuf;
22

3-
use tokio::task::JoinError;
4-
53
#[derive(thiserror::Error, Debug)]
64
pub enum Error {
7-
#[error("I/O error at path {}: {}", path.to_string_lossy(), message)]
5+
#[error("I/O error: {}", message)]
86
IOError {
97
#[source]
108
source: std::io::Error,
11-
path: PathBuf,
129
message: &'static str,
1310
},
1411

15-
#[error("I/O error: {}", message)]
16-
AsyncIOError {
12+
#[error("I/O error at path {}: {}", path.to_string_lossy(), message)]
13+
FileIOError {
1714
#[source]
18-
source: tokio::io::Error,
15+
source: std::io::Error,
16+
path: PathBuf,
1917
message: &'static str,
2018
},
2119

@@ -25,8 +23,16 @@ pub enum Error {
2523
#[error("Failed to parse JSON")]
2624
JsonParseError(serde_json::Error),
2725

26+
// std::thread panics do not return printable values by default, and while
27+
// it might be possible to upcast to a `dyn Debug`, it is hardly worth it
28+
// here.
2829
#[error("Subtask join error")]
29-
JoinError(JoinError),
30+
JoinError,
31+
32+
// Unfortunately, in most cases where this error can occur the path is no
33+
// longer available to avoid unnecessary cloning in the hot path.
34+
#[error("Required file not found")]
35+
FileNotFound,
3036

3137
#[error("Internal error")]
3238
Internal { message: String },

0 commit comments

Comments
 (0)