Skip to content

Commit 9b65696

Browse files
authored
Merge pull request #2231 from Kobzol/clean-cache-dir
Purge toolchain cache directory if it gets too large
2 parents 8f395d7 + 2c12d14 commit 9b65696

File tree

2 files changed

+62
-32
lines changed

2 files changed

+62
-32
lines changed

collector/src/bin/collector.rs

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,13 @@ use database::{
6565
CommitType, Connection, Pool,
6666
};
6767

68+
/// Directory used to cache downloaded Rust toolchains on disk.
6869
const TOOLCHAIN_CACHE_DIRECTORY: &str = "cache";
6970

71+
/// Maximum allowed number of toolchains in the toolchain cache directory.
72+
/// If the directory will have more toolchains, it will be purged.
73+
const TOOLCHAIN_CACHE_MAX_TOOLCHAINS: usize = 30;
74+
7075
fn n_normal_benchmarks_remaining(n: usize) -> String {
7176
let suffix = if n == 1 { "" } else { "s" };
7277
format!("{n} normal benchmark{suffix} remaining")
@@ -1269,15 +1274,8 @@ fn main_result() -> anyhow::Result<i32> {
12691274
}
12701275

12711276
Commands::InstallNext { codegen_backends } => {
1272-
let last_sha = Command::new("git")
1273-
.arg("ls-remote")
1274-
.arg("https://github.com/rust-lang/rust.git")
1275-
.arg("master")
1276-
.output()
1277-
.unwrap();
1278-
let last_sha = String::from_utf8(last_sha.stdout).expect("utf8");
1279-
let last_sha = last_sha.split_whitespace().next().expect(&last_sha);
1280-
let commit = get_commit_or_fake_it(last_sha).expect("success");
1277+
let last_sha = get_latest_sha("https://github.com/rust-lang/rust").unwrap();
1278+
let commit = get_commit_or_fake_it(&last_sha).expect("success");
12811279

12821280
let rt = build_async_runtime();
12831281
let mut sysroot = rt
@@ -1434,6 +1432,8 @@ async fn run_job_queue_benchmarks(
14341432
all_compile_benchmarks: Vec<Benchmark>,
14351433
check_git_sha: bool,
14361434
) -> anyhow::Result<()> {
1435+
let _ = tidy_toolchain_cache_dir();
1436+
14371437
let mut last_request_tag = None;
14381438

14391439
while let Some((benchmark_job, artifact_id)) = conn
@@ -1444,20 +1444,25 @@ async fn run_job_queue_benchmarks(
14441444
)
14451445
.await?
14461446
{
1447+
// Are we benchmarking a different benchmark request than in the previous iteration of the
1448+
// loop?
1449+
let is_new_request = last_request_tag.is_some()
1450+
&& last_request_tag.as_deref() != Some(benchmark_job.request_tag());
1451+
if is_new_request {
1452+
let _ = tidy_toolchain_cache_dir();
1453+
}
1454+
14471455
// Here we check if we should update our commit SHA, if rustc-perf has been updated.
14481456
// We only check for updates when we switch *benchmark requests*, not *benchmark jobs*,
14491457
// to avoid changing code in the middle of benchmarking the same request.
14501458
// Note that if an update happens, the job that we have just dequeued will have its deque
14511459
// counter increased. But since updates are relatively rare, that shouldn't be a big deal,
14521460
// it will be dequeued again when the collector starts again.
1453-
if check_git_sha
1454-
&& last_request_tag.is_some()
1455-
&& last_request_tag.as_deref() != Some(benchmark_job.request_tag())
1456-
&& needs_git_update(collector)
1457-
{
1461+
if check_git_sha && is_new_request && needs_git_update(collector) {
14581462
log::warn!("Exiting collector to update itself from git.");
14591463
return Ok(());
14601464
}
1465+
14611466
last_request_tag = Some(benchmark_job.request_tag().to_string());
14621467

14631468
log::info!("Dequeued job {benchmark_job:?}, artifact_id {artifact_id:?}");
@@ -1523,28 +1528,32 @@ async fn run_job_queue_benchmarks(
15231528
Ok(())
15241529
}
15251530

1531+
/// Check the toolchain cache directory and delete it if it grows too large.
1532+
/// Currently, we just assume that "too large" means "has more than N toolchains".
1533+
fn tidy_toolchain_cache_dir() -> std::io::Result<()> {
1534+
let dir_count = Path::new(TOOLCHAIN_CACHE_DIRECTORY)
1535+
.read_dir()?
1536+
.filter_map(|e| e.ok())
1537+
.filter_map(|d| d.file_type().ok())
1538+
.filter(|t| t.is_dir())
1539+
.count();
1540+
if dir_count > TOOLCHAIN_CACHE_MAX_TOOLCHAINS {
1541+
log::warn!("Purging toolchain cache directory at {TOOLCHAIN_CACHE_DIRECTORY}");
1542+
// Just remove the whole directory, to avoid having to figure out which toolchains are old
1543+
std::fs::remove_dir_all(TOOLCHAIN_CACHE_DIRECTORY)?;
1544+
}
1545+
Ok(())
1546+
}
1547+
15261548
/// Returns true if the commit SHA of collector does not match the latest commit SHA of the master
15271549
/// branch of https://github.com/rust-lang/rustc-perf.
15281550
fn needs_git_update(collector: &CollectorConfig) -> bool {
15291551
let Some(commit_sha) = collector.commit_sha() else {
15301552
return false;
15311553
};
15321554

1533-
let mut cmd = Command::new("git");
1534-
cmd.arg("ls-remote")
1535-
.arg("https://github.com/rust-lang/rustc-perf")
1536-
.arg("HEAD");
1537-
let upstream_sha = match command_output(&mut cmd) {
1538-
Ok(output) => String::from_utf8(output.stdout)
1539-
.unwrap()
1540-
.split_whitespace()
1541-
.next()
1542-
.unwrap()
1543-
.to_string(),
1544-
Err(error) => {
1545-
log::error!("Cannot determine latest SHA of rustc-perf: {error:?}");
1546-
return false;
1547-
}
1555+
let Ok(upstream_sha) = get_latest_sha("https://github.com/rust-lang/rustc-perf") else {
1556+
return false;
15481557
};
15491558
if commit_sha != upstream_sha {
15501559
log::warn!(
@@ -1556,6 +1565,23 @@ fn needs_git_update(collector: &CollectorConfig) -> bool {
15561565
}
15571566
}
15581567

1568+
/// Returns the latest known sha of the default branch of the specified `repo`.
1569+
fn get_latest_sha(repo: &str) -> anyhow::Result<String> {
1570+
let mut cmd = Command::new("git");
1571+
cmd.arg("ls-remote").arg(repo).arg("HEAD");
1572+
match command_output(&mut cmd) {
1573+
Ok(output) => Ok(String::from_utf8(output.stdout)?
1574+
.split_whitespace()
1575+
.next()
1576+
.unwrap()
1577+
.to_string()),
1578+
Err(error) => {
1579+
log::error!("Cannot determine latest SHA of {repo}: {error:?}");
1580+
Err(error)
1581+
}
1582+
}
1583+
}
1584+
15591585
/// Error that happened during benchmarking of a job.
15601586
enum BenchmarkJobError {
15611587
/// The error is non-recoverable.
@@ -1606,8 +1632,6 @@ async fn run_benchmark_job(
16061632
};
16071633
// Avoid redownloading the same sysroot multiple times for different jobs, even
16081634
// across collector restarts.
1609-
1610-
// TODO: Periodically clear the cache directory to avoid running out of disk space.
16111635
sysroot.preserve();
16121636
Toolchain::from_sysroot(&sysroot, commit.sha.clone())
16131637
}

collector/src/toolchain.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@ impl Sysroot {
4343
triple: &str,
4444
backends: &[CodegenBackend],
4545
) -> Result<Self, SysrootDownloadError> {
46-
let cache_directory = cache_directory.join(triple).join(&sha);
46+
// The structure of this directory is load-bearing.
47+
// We use the commit SHA as the top-level key, to have a quick way of estimating how many
48+
// toolchains have been installed in the cache directory.
49+
// We also use a nested directory below the target tuple, because rustc outputs weird things
50+
// when we query it with `--print sysroot` and its sysroot is located in a directory that
51+
// corresponds to a valid target name.
52+
let cache_directory = cache_directory.join(&sha).join(triple).join("toolchain");
4753
fs::create_dir_all(&cache_directory).map_err(|e| SysrootDownloadError::IO(e.into()))?;
4854

4955
let download = SysrootDownload {

0 commit comments

Comments
 (0)