Skip to content

Commit 670fdb5

Browse files
Deal with forever increasing number of packs when pulling crates.io-index (#258)
1 parent f9b33b1 commit 670fdb5

File tree

3 files changed

+99
-20
lines changed

3 files changed

+99
-20
lines changed

Cargo.lock

Lines changed: 1 addition & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ actix-web = "4"
1818
actix-web-lab = "0.24"
1919
anyhow = "1"
2020
crates-index = { version = "3", default-features = false, features = ["git", "git-https-reqwest"] }
21+
# to be kept in sync with the version used by `crates-index`
22+
gix = { version = "0.71", default-features = false }
2123
derive_more = { version = "2", features = ["display", "error", "from"] }
2224
dotenvy = "0.15"
2325
either = "1.12"
2426
font-awesome-as-a-crate = "0.3"
2527
futures-util = { version = "0.3", default-features = false, features = ["std"] }
26-
error_reporter = "1"
2728
indexmap = { version = "2", features = ["serde"] }
2829
lru_time_cache = "0.11"
2930
maud = "0.27"

src/utils/index.rs

Lines changed: 96 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use std::{sync::Arc, time::Duration};
1+
use std::{fs, sync::Arc, time::Duration};
22

3-
use anyhow::Result;
3+
use anyhow::{Context, Result};
44
use crates_index::{Crate, GitIndex};
55
use parking_lot::Mutex;
66
use tokio::{
@@ -12,19 +12,23 @@ use crate::models::crates::CrateName;
1212

1313
#[derive(Clone)]
1414
pub struct ManagedIndex {
15-
index: Arc<Mutex<GitIndex>>,
15+
index: Arc<Mutex<Option<GitIndex>>>,
1616
}
1717

1818
impl ManagedIndex {
1919
pub fn new() -> Self {
2020
// the index path is configurable through the `CARGO_HOME` env variable
21-
let index = Arc::new(Mutex::new(GitIndex::new_cargo_default().unwrap()));
21+
let index = Arc::new(Mutex::new(Some(GitIndex::new_cargo_default().unwrap())));
2222

2323
Self { index }
2424
}
2525

2626
pub fn crate_(&self, crate_name: CrateName) -> Option<Crate> {
27-
self.index.lock().crate_(crate_name.as_ref())
27+
self.index
28+
.lock()
29+
.as_ref()
30+
.expect("ManagedIndex is poisoned")
31+
.crate_(crate_name.as_ref())
2832
}
2933

3034
pub async fn refresh_at_interval(&self, update_interval: Duration) {
@@ -34,21 +38,101 @@ impl ManagedIndex {
3438
loop {
3539
if let Err(err) = self.refresh().await {
3640
tracing::error!(
37-
"failed refreshing the crates.io-index, the operation will be retried: {}",
38-
error_reporter::Report::new(err),
41+
"failed refreshing the crates.io-index, the operation will be retried: {err:#}"
3942
);
4043
}
4144
update_interval.tick().await;
4245
}
4346
}
4447

45-
async fn refresh(&self) -> Result<(), crates_index::Error> {
46-
let index = Arc::clone(&self.index);
48+
async fn refresh(&self) -> Result<()> {
49+
let this_index = Arc::clone(&self.index);
4750

48-
spawn_blocking(move || index.lock().update())
49-
.await
50-
.expect("blocking index update task should never panic")?;
51+
spawn_blocking(move || {
52+
let mut index = this_index.lock();
53+
let git_index = index.as_mut().context("ManagedIndex is poisoned")?;
54+
55+
match git_index.update() {
56+
Ok(()) => Ok(()),
57+
Err(err) => match current_entries(&err) {
58+
Some(..4096) => {
59+
tracing::info!(
60+
"Reopening crates.io-index to make gix expand the internal slotmap"
61+
);
62+
*git_index = GitIndex::with_path(git_index.path(), git_index.url())
63+
.context("could not reopen git index")?;
64+
git_index
65+
.update()
66+
.context("failed to update crates.io-index after `git gc`")
67+
}
68+
Some(4096..) => {
69+
tracing::info!(
70+
"Cloning a new crates.io-index and replacing it with the current one"
71+
);
72+
let path = git_index.path().to_owned();
73+
let url = git_index.url().to_owned();
74+
75+
// Avoid keeping the index locked for too long
76+
drop(index);
77+
78+
// Clone the new index
79+
let mut tmp_path = path.clone();
80+
tmp_path.as_mut_os_string().push(".new");
81+
if tmp_path.try_exists()? {
82+
fs::remove_dir_all(&tmp_path)?;
83+
}
84+
let new_index = GitIndex::with_path(&tmp_path, &url)
85+
.context("could not clone new git index")?;
86+
87+
// Swap the old index with the new one
88+
drop(new_index);
89+
90+
let mut index = this_index.lock();
91+
*index = None;
92+
// NOTE: if any of the following operations fail,
93+
// the index is poisoned
94+
fs::remove_dir_all(&path)?;
95+
fs::rename(tmp_path, &path)?;
96+
97+
*index = Some(
98+
GitIndex::with_path(path, url).context("could not reopen git index")?,
99+
);
100+
Ok(())
101+
}
102+
None => {
103+
Err(anyhow::Error::from(err).context("failed to update crates.io-index"))
104+
}
105+
},
106+
}
107+
})
108+
.await
109+
.expect("blocking index update task should never panic")?;
51110

52111
Ok(())
53112
}
54113
}
114+
115+
fn current_entries(err: &crates_index::Error) -> Option<usize> {
116+
let crates_index::Error::Git(err) = err else {
117+
return None;
118+
};
119+
let crates_index::error::GixError::Fetch(err) = err else {
120+
return None;
121+
};
122+
let gix::remote::fetch::Error::UpdateRefs(err) = err else {
123+
return None;
124+
};
125+
let gix::remote::fetch::refs::update::Error::FindObject(gix::object::find::Error(err)) = err
126+
else {
127+
return None;
128+
};
129+
let err = err.downcast_ref::<gix::odb::store::find::Error>()?;
130+
let gix::odb::store::find::Error::LoadIndex(err) = err else {
131+
return None;
132+
};
133+
let gix::odb::store::load_index::Error::InsufficientSlots { current, needed } = err else {
134+
return None;
135+
};
136+
137+
Some(*current + *needed)
138+
}

0 commit comments

Comments
 (0)