Skip to content

Commit 6e416b7

Browse files
generallagourlay
andauthored
refactor current workdir backup shenanigans (#21)
* refactor current workdir backup shenanigans * try fixing CI * enable for test * propagate change * always backup storage on CI --------- Co-authored-by: Arnaud Gourlay <arnaud.gourlay@gmail.com>
1 parent 4b74627 commit 6e416b7

File tree

6 files changed

+33
-39
lines changed

6 files changed

+33
-39
lines changed

.github/workflows/run-crasher.yml

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: Crash
22

33
on:
44
push:
5-
branches: [ "master" ]
5+
branches: [ "*" ]
66
schedule:
77
# every day at 12 CET
88
- cron: "0 14 * * *"
@@ -14,10 +14,6 @@ on:
1414
crashing_time:
1515
description: "Duration in seconds, default is 3600s (1h)"
1616
default: 3600
17-
backup_working_dir:
18-
description: "Backup working dir before each Qdrant restart"
19-
type: boolean
20-
default: false
2117

2218
env:
2319
CARGO_TERM_COLOR: always
@@ -89,9 +85,9 @@ jobs:
8985
cp -r qdrant-src/config qdrant
9086
9187
crashing_time="${{ steps.default_inputs.outputs.crashing_time }}"
92-
backup_working_dir="${{ inputs.backup_working_dir && 'qdrant-backup' }}"
88+
backup_storage_dir="${{ format('{0}/qdrant/storage-backup', github.workspace) }}"
9389
94-
./crash-things.sh qdrant ../qdrant-src/target/debug/qdrant 0.3 "$crashing_time" ${backup_working_dir:+"$backup_working_dir"}
90+
./crash-things.sh qdrant ../qdrant-src/target/debug/qdrant 0.3 "$crashing_time" "$backup_storage_dir"
9591
- name: Upload logs on failure
9692
uses: actions/upload-artifact@v4
9793
if: failure() || cancelled()
@@ -109,7 +105,6 @@ jobs:
109105
retention-days: 10
110106
path: |
111107
qdrant/
112-
qdrant-backup/
113108
- name: Send Notification
114109
if: failure() || cancelled()
115110
uses: slackapi/slack-github-action@v1.26.0

crash-things.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ QDRANT_DIR=${1:-./qdrant/}
66
QDRANT_EXEC=${2:-target/debug/qdrant}
77
CRASH_PROBABILITY=${3:-0.3}
88
RUN_TIME=${4:-300}
9-
QDRANT_BACKUP_DIRS=( "${@:5}" )
9+
QDRANT_BACKUP_DIR=${5:?backup directory is required}
1010

1111
CRASHER_LOG=crasher.log
1212
QDRANT_LOG=../qdrant.log
@@ -15,7 +15,7 @@ CRASHER_CMD=(
1515
cargo run --release
1616
--
1717
--working-dir "$QDRANT_DIR"
18-
${QDRANT_BACKUP_DIRS[@]/#/--backup-working-dir } # this does not handle spaces 😬
18+
--storage-backup $QDRANT_BACKUP_DIR
1919
--exec-path "$QDRANT_EXEC"
2020
--crash-probability "$CRASH_PROBABILITY"
2121
)

readme.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ Usage: crasher [OPTIONS] --working-dir <WORKING_DIR> --exec-path <EXEC_PATH>
1717
Options:
1818
--working-dir <WORKING_DIR>
1919
Working directory for Qdrant data
20-
--backup-working-dir <BACKUP_WORKING_DIR>
21-
Backup working directory between Qdrant restarts (useful to debug storage recovery issues)
20+
--storage-backup <STORAGE_BACKUP>
21+
Backup `storage` directory from `working_dir` between Qdrant restarts (useful to debug storage recovery issues)
2222
--exec-path <EXEC_PATH>
2323
Path to executable binary relative to `working_dir`
2424
--crash-probability <CRASH_PROBABILITY>

src/args.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ pub struct Args {
77
/// Working directory for Qdrant data
88
#[arg(long)]
99
pub working_dir: String,
10-
/// Backup working directory between Qdrant restarts (useful to debug storage recovery issues)
10+
/// Backup `storage` directory from `working_dir` between Qdrant restarts (useful to debug storage recovery issues)
1111
#[arg(long)]
12-
pub backup_working_dir: Vec<String>,
12+
pub storage_backup: Option<String>,
1313
/// Path to executable binary relative to `working_dir`
1414
#[arg(long)]
1515
pub exec_path: String,

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ async fn main() {
7777
let (rng_seed, mut workload_rng, mut chaos_rng) = create_rngs(args.rng_seed);
7878

7979
// workload task
80-
let workload = Workload::new(
80+
let workload: Workload = Workload::new(
8181
collection_name,
8282
stopped.clone(),
8383
crash_lock.clone(),

src/process.rs

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use crate::util;
44
use anyhow::Context as _;
55
use qdrant_client::Qdrant;
66
use rand::Rng;
7-
use std::collections::VecDeque;
87
use std::io;
8+
use std::path::PathBuf;
99
use std::process::exit;
1010
use std::sync::Arc;
1111
use std::sync::atomic::{AtomicBool, Ordering};
@@ -50,21 +50,24 @@ pub fn start_process(
5050
pub struct ProcessManager {
5151
pub working_dir: String,
5252
pub binary_path: String,
53-
pub backup_dirs: VecDeque<String>,
53+
pub backup_dir: Option<String>,
5454
pub child_process: Child,
5555
pub kill_on_drop: bool,
5656
pub cpu_quota: Option<u32>,
5757
}
5858

5959
impl ProcessManager {
6060
pub fn from_args(args: &Args) -> io::Result<Self> {
61-
let manager = Self::new(
61+
let mut manager = Self::new(
6262
&args.working_dir,
6363
&args.exec_path,
6464
args.shutdown_on_error,
6565
args.cpu_quota,
66-
)?
67-
.with_backup_dirs(args.backup_working_dir.clone());
66+
)?;
67+
68+
if let Some(storage_backup) = &args.storage_backup {
69+
manager = manager.with_backup_dirs(storage_backup);
70+
}
6871

6972
Ok(manager)
7073
}
@@ -80,15 +83,15 @@ impl ProcessManager {
8083
Ok(Self {
8184
working_dir: working_dir.to_string(),
8285
binary_path: binary_path.to_string(),
83-
backup_dirs: VecDeque::new(),
86+
backup_dir: None,
8487
child_process: child,
8588
kill_on_drop,
8689
cpu_quota,
8790
})
8891
}
8992

90-
pub fn with_backup_dirs(mut self, backup_dirs: impl Into<VecDeque<String>>) -> Self {
91-
self.backup_dirs = backup_dirs.into();
93+
pub fn with_backup_dirs(mut self, backup_dir: impl Into<String>) -> Self {
94+
self.backup_dir = Some(backup_dir.into());
9295
self
9396
}
9497

@@ -97,25 +100,27 @@ impl ProcessManager {
97100
self.child_process.kill().await.unwrap();
98101
}
99102

100-
pub async fn backup_working_dir(&mut self) -> anyhow::Result<()> {
101-
let Some(backup_dir) = self.backup_dirs.front() else {
103+
pub async fn backup_storage_dir(&mut self) -> anyhow::Result<()> {
104+
let Some(backup_dir) = &self.backup_dir else {
102105
return Ok(());
103106
};
104107

105108
let backup_exists = fs::try_exists(backup_dir).await.with_context(|| {
106-
format!("failed to query if backup working dir {backup_dir} exists")
109+
format!("failed to query if backup storage dir {backup_dir} exists")
107110
})?;
108111

112+
let backup_dir = PathBuf::from(backup_dir);
113+
let backup_dir_path = backup_dir.as_path();
114+
115+
let source_storage_dir = PathBuf::from(&self.working_dir).join("storage");
116+
109117
if backup_exists {
110-
fs::remove_dir_all(backup_dir)
118+
fs::remove_dir_all(backup_dir_path)
111119
.await
112-
.with_context(|| format!("failed to remove backup working dir {backup_dir}"))?;
120+
.with_context(|| format!("failed to remove backup storage dir {backup_dir_path:?}"))?;
113121
}
114122

115-
util::copy_dir(&self.working_dir, backup_dir).await?;
116-
117-
let backup_dir = self.backup_dirs.pop_front().expect("backup dir");
118-
self.backup_dirs.push_back(backup_dir);
123+
util::copy_dir(&source_storage_dir, backup_dir_path).await?;
119124

120125
Ok(())
121126
}
@@ -141,13 +146,7 @@ impl ProcessManager {
141146
log::info!("** Restarting qdrant **");
142147
self.kill_process().await;
143148

144-
if let Err(err) = self.backup_working_dir().await {
145-
log::error!(
146-
"Failed to backup working dir {} to {}: {err:?}",
147-
self.working_dir,
148-
self.backup_dirs.front().expect("backup dir"),
149-
);
150-
}
149+
self.backup_storage_dir().await.unwrap();
151150

152151
self.child_process = start_process(
153152
&self.working_dir,

0 commit comments

Comments
 (0)