Skip to content

Commit 9f9a94a

Browse files
committed
worker/jobs/index: Extract sync jobs into dedicated module
1 parent 77befd4 commit 9f9a94a

File tree

2 files changed

+164
-162
lines changed

2 files changed

+164
-162
lines changed

src/worker/jobs/index/mod.rs

Lines changed: 2 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -1,167 +1,7 @@
1-
use crate::models;
2-
use crate::tasks::spawn_blocking;
3-
use crate::util::diesel::Conn;
4-
use crate::worker::Environment;
5-
use anyhow::Context;
6-
use crates_io_index::Repository;
7-
use crates_io_worker::BackgroundJob;
8-
use diesel::prelude::*;
9-
use diesel_async::async_connection_wrapper::AsyncConnectionWrapper;
10-
use sentry::Level;
11-
use std::fs::{self, File};
12-
use std::io::{ErrorKind, Write};
13-
use std::sync::Arc;
14-
151
mod normalize;
162
mod squash;
3+
mod sync;
174

185
pub use normalize::NormalizeIndex;
196
pub use squash::SquashIndex;
20-
21-
#[derive(Serialize, Deserialize)]
22-
pub struct SyncToGitIndex {
23-
krate: String,
24-
}
25-
26-
impl SyncToGitIndex {
27-
pub fn new(krate: impl Into<String>) -> Self {
28-
let krate = krate.into();
29-
Self { krate }
30-
}
31-
}
32-
33-
impl BackgroundJob for SyncToGitIndex {
34-
const JOB_NAME: &'static str = "sync_to_git_index";
35-
const PRIORITY: i16 = 100;
36-
const QUEUE: &'static str = "repository";
37-
38-
type Context = Arc<Environment>;
39-
40-
/// Regenerates or removes an index file for a single crate
41-
#[instrument(skip_all, fields(krate.name = ? self.krate))]
42-
async fn run(&self, env: Self::Context) -> anyhow::Result<()> {
43-
info!("Syncing to git index");
44-
45-
let crate_name = self.krate.clone();
46-
let conn = env.deadpool.get().await?;
47-
spawn_blocking(move || {
48-
let conn: &mut AsyncConnectionWrapper<_> = &mut conn.into();
49-
50-
let new = get_index_data(&crate_name, conn).context("Failed to get index data")?;
51-
52-
let repo = env.lock_index()?;
53-
let dst = repo.index_file(&crate_name);
54-
55-
// Read the previous crate contents
56-
let old = match fs::read_to_string(&dst) {
57-
Ok(content) => Some(content),
58-
Err(error) if error.kind() == ErrorKind::NotFound => None,
59-
Err(error) => return Err(error.into()),
60-
};
61-
62-
match (old, new) {
63-
(None, Some(new)) => {
64-
fs::create_dir_all(dst.parent().unwrap())?;
65-
let mut file = File::create(&dst)?;
66-
file.write_all(new.as_bytes())?;
67-
repo.commit_and_push(&format!("Create crate `{}`", &crate_name), &dst)?;
68-
}
69-
(Some(old), Some(new)) if old != new => {
70-
let mut file = File::create(&dst)?;
71-
file.write_all(new.as_bytes())?;
72-
repo.commit_and_push(&format!("Update crate `{}`", &crate_name), &dst)?;
73-
}
74-
(Some(_old), None) => {
75-
fs::remove_file(&dst)?;
76-
repo.commit_and_push(&format!("Delete crate `{}`", &crate_name), &dst)?;
77-
}
78-
_ => debug!("Skipping sync because index is up-to-date"),
79-
}
80-
81-
Ok(())
82-
})
83-
.await
84-
}
85-
}
86-
87-
#[derive(Serialize, Deserialize)]
88-
pub struct SyncToSparseIndex {
89-
krate: String,
90-
}
91-
92-
impl SyncToSparseIndex {
93-
pub fn new(krate: impl Into<String>) -> Self {
94-
let krate = krate.into();
95-
Self { krate }
96-
}
97-
}
98-
99-
impl BackgroundJob for SyncToSparseIndex {
100-
const JOB_NAME: &'static str = "sync_to_sparse_index";
101-
const PRIORITY: i16 = 100;
102-
103-
type Context = Arc<Environment>;
104-
105-
/// Regenerates or removes an index file for a single crate
106-
#[instrument(skip_all, fields(krate.name = ?self.krate))]
107-
async fn run(&self, env: Self::Context) -> anyhow::Result<()> {
108-
info!("Syncing to sparse index");
109-
110-
let crate_name = self.krate.clone();
111-
let conn = env.deadpool.get().await?;
112-
let content = spawn_blocking(move || {
113-
let conn: &mut AsyncConnectionWrapper<_> = &mut conn.into();
114-
get_index_data(&crate_name, conn)
115-
})
116-
.await
117-
.context("Failed to get index data")?;
118-
119-
let future = env.storage.sync_index(&self.krate, content);
120-
future.await.context("Failed to sync index data")?;
121-
122-
if let Some(cloudfront) = env.cloudfront() {
123-
let path = Repository::relative_index_file_for_url(&self.krate);
124-
125-
info!(%path, "Invalidating index file on CloudFront");
126-
let future = cloudfront.invalidate(&path);
127-
future.await.context("Failed to invalidate CloudFront")?;
128-
}
129-
Ok(())
130-
}
131-
}
132-
133-
#[instrument(skip_all, fields(krate.name = ?name))]
134-
pub fn get_index_data(name: &str, conn: &mut impl Conn) -> anyhow::Result<Option<String>> {
135-
debug!("Looking up crate by name");
136-
let Some(krate): Option<models::Crate> =
137-
models::Crate::by_exact_name(name).first(conn).optional()?
138-
else {
139-
return Ok(None);
140-
};
141-
142-
debug!("Gathering remaining index data");
143-
let crates = krate
144-
.index_metadata(conn)
145-
.context("Failed to gather index metadata")?;
146-
147-
// This can sometimes happen when we delete versions upon owner request
148-
// but don't realize that the crate is now left with no versions at all.
149-
//
150-
// In this case we will delete the crate from the index and log a warning to
151-
// Sentry to clean this up in the database.
152-
if crates.is_empty() {
153-
let message = format!("Crate `{name}` has no versions left");
154-
sentry::capture_message(&message, Level::Warning);
155-
156-
return Ok(None);
157-
}
158-
159-
debug!("Serializing index data");
160-
let mut bytes = Vec::new();
161-
crates_io_index::write_crates(&crates, &mut bytes)
162-
.context("Failed to serialize index metadata")?;
163-
164-
let str = String::from_utf8(bytes).context("Failed to decode index metadata as utf8")?;
165-
166-
Ok(Some(str))
167-
}
7+
pub use sync::{SyncToGitIndex, SyncToSparseIndex};

src/worker/jobs/index/sync.rs

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
use crate::models;
2+
use crate::tasks::spawn_blocking;
3+
use crate::util::diesel::Conn;
4+
use crate::worker::Environment;
5+
use anyhow::Context;
6+
use crates_io_index::Repository;
7+
use crates_io_worker::BackgroundJob;
8+
use diesel::{OptionalExtension, RunQueryDsl};
9+
use diesel_async::async_connection_wrapper::AsyncConnectionWrapper;
10+
use sentry::Level;
11+
use std::fs;
12+
use std::fs::File;
13+
use std::io::{ErrorKind, Write};
14+
use std::sync::Arc;
15+
16+
#[derive(Serialize, Deserialize)]
17+
pub struct SyncToGitIndex {
18+
krate: String,
19+
}
20+
21+
impl SyncToGitIndex {
22+
pub fn new(krate: impl Into<String>) -> Self {
23+
let krate = krate.into();
24+
Self { krate }
25+
}
26+
}
27+
28+
impl BackgroundJob for SyncToGitIndex {
29+
const JOB_NAME: &'static str = "sync_to_git_index";
30+
const PRIORITY: i16 = 100;
31+
const QUEUE: &'static str = "repository";
32+
33+
type Context = Arc<Environment>;
34+
35+
/// Regenerates or removes an index file for a single crate
36+
#[instrument(skip_all, fields(krate.name = ? self.krate))]
37+
async fn run(&self, env: Self::Context) -> anyhow::Result<()> {
38+
info!("Syncing to git index");
39+
40+
let crate_name = self.krate.clone();
41+
let conn = env.deadpool.get().await?;
42+
spawn_blocking(move || {
43+
let conn: &mut AsyncConnectionWrapper<_> = &mut conn.into();
44+
45+
let new = get_index_data(&crate_name, conn).context("Failed to get index data")?;
46+
47+
let repo = env.lock_index()?;
48+
let dst = repo.index_file(&crate_name);
49+
50+
// Read the previous crate contents
51+
let old = match fs::read_to_string(&dst) {
52+
Ok(content) => Some(content),
53+
Err(error) if error.kind() == ErrorKind::NotFound => None,
54+
Err(error) => return Err(error.into()),
55+
};
56+
57+
match (old, new) {
58+
(None, Some(new)) => {
59+
fs::create_dir_all(dst.parent().unwrap())?;
60+
let mut file = File::create(&dst)?;
61+
file.write_all(new.as_bytes())?;
62+
repo.commit_and_push(&format!("Create crate `{}`", &crate_name), &dst)?;
63+
}
64+
(Some(old), Some(new)) if old != new => {
65+
let mut file = File::create(&dst)?;
66+
file.write_all(new.as_bytes())?;
67+
repo.commit_and_push(&format!("Update crate `{}`", &crate_name), &dst)?;
68+
}
69+
(Some(_old), None) => {
70+
fs::remove_file(&dst)?;
71+
repo.commit_and_push(&format!("Delete crate `{}`", &crate_name), &dst)?;
72+
}
73+
_ => debug!("Skipping sync because index is up-to-date"),
74+
}
75+
76+
Ok(())
77+
})
78+
.await
79+
}
80+
}
81+
82+
#[derive(Serialize, Deserialize)]
83+
pub struct SyncToSparseIndex {
84+
krate: String,
85+
}
86+
87+
impl SyncToSparseIndex {
88+
pub fn new(krate: impl Into<String>) -> Self {
89+
let krate = krate.into();
90+
Self { krate }
91+
}
92+
}
93+
94+
impl BackgroundJob for SyncToSparseIndex {
95+
const JOB_NAME: &'static str = "sync_to_sparse_index";
96+
const PRIORITY: i16 = 100;
97+
98+
type Context = Arc<Environment>;
99+
100+
/// Regenerates or removes an index file for a single crate
101+
#[instrument(skip_all, fields(krate.name = ?self.krate))]
102+
async fn run(&self, env: Self::Context) -> anyhow::Result<()> {
103+
info!("Syncing to sparse index");
104+
105+
let crate_name = self.krate.clone();
106+
let conn = env.deadpool.get().await?;
107+
let content = spawn_blocking(move || {
108+
let conn: &mut AsyncConnectionWrapper<_> = &mut conn.into();
109+
get_index_data(&crate_name, conn)
110+
})
111+
.await
112+
.context("Failed to get index data")?;
113+
114+
let future = env.storage.sync_index(&self.krate, content);
115+
future.await.context("Failed to sync index data")?;
116+
117+
if let Some(cloudfront) = env.cloudfront() {
118+
let path = Repository::relative_index_file_for_url(&self.krate);
119+
120+
info!(%path, "Invalidating index file on CloudFront");
121+
let future = cloudfront.invalidate(&path);
122+
future.await.context("Failed to invalidate CloudFront")?;
123+
}
124+
Ok(())
125+
}
126+
}
127+
128+
#[instrument(skip_all, fields(krate.name = ?name))]
129+
fn get_index_data(name: &str, conn: &mut impl Conn) -> anyhow::Result<Option<String>> {
130+
debug!("Looking up crate by name");
131+
let Some(krate): Option<models::Crate> =
132+
models::Crate::by_exact_name(name).first(conn).optional()?
133+
else {
134+
return Ok(None);
135+
};
136+
137+
debug!("Gathering remaining index data");
138+
let crates = krate
139+
.index_metadata(conn)
140+
.context("Failed to gather index metadata")?;
141+
142+
// This can sometimes happen when we delete versions upon owner request
143+
// but don't realize that the crate is now left with no versions at all.
144+
//
145+
// In this case we will delete the crate from the index and log a warning to
146+
// Sentry to clean this up in the database.
147+
if crates.is_empty() {
148+
let message = format!("Crate `{name}` has no versions left");
149+
sentry::capture_message(&message, Level::Warning);
150+
151+
return Ok(None);
152+
}
153+
154+
debug!("Serializing index data");
155+
let mut bytes = Vec::new();
156+
crates_io_index::write_crates(&crates, &mut bytes)
157+
.context("Failed to serialize index metadata")?;
158+
159+
let str = String::from_utf8(bytes).context("Failed to decode index metadata as utf8")?;
160+
161+
Ok(Some(str))
162+
}

0 commit comments

Comments
 (0)