Skip to content

Commit 027ecfb

Browse files
committed
Implement tidying of collector cache directory
1 parent 18f55cf commit 027ecfb

File tree

1 file changed

+34
-7
lines changed

1 file changed

+34
-7
lines changed

collector/src/bin/collector.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,13 @@ use database::{
6565
CommitType, Connection, Pool,
6666
};
6767

68+
/// Directory used to cache downloaded Rust toolchains on disk.
6869
const TOOLCHAIN_CACHE_DIRECTORY: &str = "cache";
6970

71+
/// Maximum allowed number of toolchains in the toolchain cache directory.
72+
/// If the directory will have more toolchains, it will be purged.
73+
const TOOLCHAIN_CACHE_MAX_TOOLCHAINS: usize = 30;
74+
7075
fn n_normal_benchmarks_remaining(n: usize) -> String {
7176
let suffix = if n == 1 { "" } else { "s" };
7277
format!("{n} normal benchmark{suffix} remaining")
@@ -1434,6 +1439,8 @@ async fn run_job_queue_benchmarks(
14341439
all_compile_benchmarks: Vec<Benchmark>,
14351440
check_git_sha: bool,
14361441
) -> anyhow::Result<()> {
1442+
let _ = tidy_toolchain_cache_dir();
1443+
14371444
let mut last_request_tag = None;
14381445

14391446
while let Some((benchmark_job, artifact_id)) = conn
@@ -1444,20 +1451,25 @@ async fn run_job_queue_benchmarks(
14441451
)
14451452
.await?
14461453
{
1454+
// Are we benchmarking a different benchmark request than in the previous iteration of the
1455+
// loop?
1456+
let is_new_request = last_request_tag.is_some()
1457+
&& last_request_tag.as_deref() != Some(benchmark_job.request_tag());
1458+
if is_new_request {
1459+
let _ = tidy_toolchain_cache_dir();
1460+
}
1461+
14471462
// Here we check if we should update our commit SHA, if rustc-perf has been updated.
14481463
// We only check for updates when we switch *benchmark requests*, not *benchmark jobs*,
14491464
// to avoid changing code in the middle of benchmarking the same request.
14501465
// Note that if an update happens, the job that we have just dequeued will have its deque
14511466
// counter increased. But since updates are relatively rare, that shouldn't be a big deal,
14521467
// it will be dequeued again when the collector starts again.
1453-
if check_git_sha
1454-
&& last_request_tag.is_some()
1455-
&& last_request_tag.as_deref() != Some(benchmark_job.request_tag())
1456-
&& needs_git_update(collector)
1457-
{
1468+
if check_git_sha && is_new_request && needs_git_update(collector) {
14581469
log::warn!("Exiting collector to update itself from git.");
14591470
return Ok(());
14601471
}
1472+
14611473
last_request_tag = Some(benchmark_job.request_tag().to_string());
14621474

14631475
log::info!("Dequeued job {benchmark_job:?}, artifact_id {artifact_id:?}");
@@ -1523,6 +1535,23 @@ async fn run_job_queue_benchmarks(
15231535
Ok(())
15241536
}
15251537

1538+
/// Check the toolchain cache directory and delete it if it grows too large.
1539+
/// Currently, we just assume that "too large" means "has more than N toolchains".
1540+
fn tidy_toolchain_cache_dir() -> std::io::Result<()> {
1541+
let dir_count = Path::new(TOOLCHAIN_CACHE_DIRECTORY)
1542+
.read_dir()?
1543+
.filter_map(|e| e.ok())
1544+
.filter_map(|d| d.file_type().ok())
1545+
.filter(|t| t.is_dir())
1546+
.count();
1547+
if dir_count > TOOLCHAIN_CACHE_MAX_TOOLCHAINS {
1548+
log::warn!("Purging toolchain cache directory at {TOOLCHAIN_CACHE_DIRECTORY}");
1549+
// Just remove the whole directory, to avoid having to figure out which toolchains are old
1550+
std::fs::remove_dir_all(TOOLCHAIN_CACHE_DIRECTORY)?;
1551+
}
1552+
Ok(())
1553+
}
1554+
15261555
/// Returns true if the commit SHA of collector does not match the latest commit SHA of the master
15271556
/// branch of https://github.com/rust-lang/rustc-perf.
15281557
fn needs_git_update(collector: &CollectorConfig) -> bool {
@@ -1606,8 +1635,6 @@ async fn run_benchmark_job(
16061635
};
16071636
// Avoid redownloading the same sysroot multiple times for different jobs, even
16081637
// across collector restarts.
1609-
1610-
// TODO: Periodically clear the cache directory to avoid running out of disk space.
16111638
sysroot.preserve();
16121639
Toolchain::from_sysroot(&sysroot, commit.sha.clone())
16131640
}

0 commit comments

Comments
 (0)