Skip to content

Commit 99d88d7

Browse files
committed
dependencies: cache file existence when canonicalizing
After caching canonicalization, the calls to std::fs::exists were making up a large portion of the remaining syscalls. Caching these, too, yields another 30-40% speedup. After this commit, on large projects, the hash table for the cache itself dominates performance measurements, which is about as far as I'm willing to go for now.
1 parent 815b679 commit 99d88d7

File tree

5 files changed

+39
-24
lines changed

5 files changed

+39
-24
lines changed

src/dependencies/canonicalize.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::hash::Hash;
55
use std::path::{Path, PathBuf};
66
use std::sync::{Arc, LazyLock, RwLock};
77

8-
static PATH_CACHE: LazyLock<Arc<RwLock<HashMap<PathBuf, PathBuf>>>> =
8+
static PATH_CACHE: LazyLock<Arc<RwLock<HashMap<PathBuf, Option<PathBuf>>>>> =
99
LazyLock::new(|| Default::default());
1010

1111
/// Wrapper around [`std::fs::canonicalize`] that caches already canonicalized
@@ -14,7 +14,7 @@ static PATH_CACHE: LazyLock<Arc<RwLock<HashMap<PathBuf, PathBuf>>>> =
1414
/// This is done due to the operation being inherently slow: It has to walk the
1515
/// entire parent directory tree (especially expensive on FUSE-mounted virtual
1616
/// repository filesystems).
17-
pub fn canonicalize_cached<P>(path: P) -> Result<PathBuf, std::io::Error>
17+
pub fn canonicalize_cached<P>(path: P) -> Result<Option<PathBuf>, std::io::Error>
1818
where
1919
P: AsRef<Path>,
2020
PathBuf: Borrow<P>,
@@ -29,7 +29,12 @@ where
2929
}
3030

3131
// ... then look it up ourselves.
32-
let result = canonicalize(&path)?;
32+
let result = if path.as_ref().exists() {
33+
Some(canonicalize(&path)?)
34+
} else {
35+
None
36+
};
37+
3338
let mut cache = PATH_CACHE.write().unwrap();
3439
cache.insert(path.as_ref().to_path_buf(), result.clone());
3540

src/dependencies/compiledb.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@ impl TryFrom<CompileCommandsEntry> for SourceFileEntry {
4646
source_file
4747
};
4848

49-
let file_path = canonicalize_cached(file_path).map_err(|source| Error::IOError {
50-
source,
51-
message: "canonicalize",
52-
})?;
49+
let file_path = canonicalize_cached(file_path)
50+
.map_err(|source| Error::IOError {
51+
source,
52+
message: "canonicalize",
53+
})?
54+
.ok_or(Error::FileNotFound)?;
5355

5456
let args = value
5557
.arguments
@@ -61,7 +63,7 @@ impl TryFrom<CompileCommandsEntry> for SourceFileEntry {
6163
.map(PathBuf::from)
6264
.filter_map(|p| {
6365
if p.is_relative() {
64-
canonicalize_cached(start_dir.join(p)).ok()
66+
canonicalize_cached(start_dir.join(p)).ok()?
6567
} else {
6668
Some(p)
6769
}

src/dependencies/cparse.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,7 @@ use tracing::{error, info, trace};
1515
/// Attempt to make the full path of head::tail
1616
/// returns None if that fails (e.g. path does not exist)
1717
fn try_resolve(head: &Path, tail: &Path) -> Option<PathBuf> {
18-
let path = head.join(tail);
19-
if !path.exists() {
20-
return None;
21-
}
22-
23-
canonicalize_cached(head.join(tail)).ok()
18+
canonicalize_cached(head.join(tail)).ok()?
2419
}
2520

2621
#[derive(PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
@@ -124,9 +119,11 @@ where
124119

125120
for entry in paths {
126121
let path = match entry {
127-
Ok(value) => canonicalize_cached(value).map_err(|e| Error::Internal {
128-
message: format!("{:?}", e),
129-
})?,
122+
Ok(value) => canonicalize_cached(value)
123+
.map_err(|e| Error::Internal {
124+
message: format!("{:?}", e),
125+
})?
126+
.ok_or(Error::FileNotFound)?,
130127
Err(e) => {
131128
return Err(Error::Internal {
132129
message: format!("{:?}", e),

src/dependencies/error.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ pub enum Error {
2929
#[error("Subtask join error")]
3030
JoinError,
3131

32+
// Unfortunately, in most cases where this error can occur the path is no
33+
// longer available to avoid unnecessary cloning in the hot path.
34+
#[error("Required file not found")]
35+
FileNotFound,
36+
3237
#[error("Internal error")]
3338
Internal { message: String },
3439

src/dependencies/gn.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,22 @@ pub fn load_gn_targets(
3030
) -> Result<Vec<GnTarget>, Error> {
3131
// TODO: GN PATH?
3232
let mut command = Command::new("/usr/bin/gn");
33-
let source_root = canonicalize_cached(source_root).map_err(|e| Error::Internal {
34-
message: format!("Canonical path: {:?}", e),
35-
})?;
33+
let source_root = canonicalize_cached(source_root)
34+
.map_err(|e| Error::Internal {
35+
message: format!("Canonical path: {:?}", e),
36+
})?
37+
.ok_or(Error::FileNotFound)?;
3638

3739
command.arg("desc");
3840
command.arg("--format=json");
3941
command.arg(format!("--root={}", source_root.to_string_lossy()));
40-
command.arg(canonicalize_cached(gn_dir).map_err(|e| Error::Internal {
41-
message: format!("Canonical path: {:?}", e),
42-
})?);
42+
command.arg(
43+
canonicalize_cached(gn_dir)
44+
.map_err(|e| Error::Internal {
45+
message: format!("Canonical path: {:?}", e),
46+
})?
47+
.ok_or(Error::FileNotFound)?,
48+
);
4349
command.arg(target);
4450
command.arg("sources");
4551

@@ -93,7 +99,7 @@ pub fn load_gn_targets(
9399
// otherwise assume absolute and use as-is
94100
PathBuf::from(&s.as_str())
95101
};
96-
canonicalize_cached(p).ok()
102+
canonicalize_cached(p).ok()?
97103
})
98104
.inspect(|path| {
99105
info!(target: "gn-path", " - {:?}", path);

0 commit comments

Comments
 (0)