Skip to content

Commit b1df1aa

Browse files
committed
vmm: Add backup api
1 parent db6daaa commit b1df1aa

File tree

3 files changed

+144
-3
lines changed

3 files changed

+144
-3
lines changed

vmm/rpc/proto/vmm_rpc.proto

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,25 @@ message GpuInfo {
223223
bool is_free = 4;
224224
}
225225

226+
message BackupDiskRequest {
227+
// vm id
228+
string id = 1;
229+
// full or incremental
230+
string level = 2;
231+
}
232+
233+
message BackupInfo {
234+
// filename (e.g., FULL-1694222400-hd1.img)
235+
string filename = 1;
236+
// size of the backup in bytes
237+
uint64 size = 2;
238+
}
239+
240+
message ListBackupsResponse {
241+
// list of backups
242+
repeated BackupInfo backups = 1;
243+
}
244+
226245
// Service definition for dstack-vmm
227246
service Vmm {
228247
// RPC to create a VM
@@ -261,4 +280,10 @@ service Vmm {
261280

262281
// List GPUs
263282
rpc ListGpus(google.protobuf.Empty) returns (ListGpusResponse);
283+
284+
// Backup a VM data disk
285+
rpc BackupDisk(BackupDiskRequest) returns (google.protobuf.Empty);
286+
287+
// List backups for a VM
288+
rpc ListBackups(Id) returns (ListBackupsResponse);
264289
}

vmm/src/app.rs

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ use dstack_kms_rpc::kms_client::KmsClient;
66
use dstack_types::shared_filenames::{
77
compat_v3, APP_COMPOSE, ENCRYPTED_ENV, INSTANCE_INFO, SYS_CONFIG, USER_CONFIG,
88
};
9-
use dstack_vmm_rpc::{self as pb, GpuInfo, StatusRequest, StatusResponse, VmConfiguration};
9+
use dstack_vmm_rpc::{
10+
self as pb, BackupInfo, GpuInfo, StatusRequest, StatusResponse, VmConfiguration,
11+
};
1012
use fs_err as fs;
1113
use guest_api::client::DefaultClient as GuestClient;
1214
use id_pool::IdPool;
@@ -18,7 +20,7 @@ use std::net::IpAddr;
1820
use std::path::{Path, PathBuf};
1921
use std::sync::{Arc, Mutex, MutexGuard};
2022
use supervisor_client::SupervisorClient;
21-
use tracing::{error, info};
23+
use tracing::{error, info, warn};
2224

2325
pub use image::{Image, ImageInfo};
2426
pub use qemu::{VmConfig, VmWorkDir};
@@ -647,6 +649,111 @@ impl App {
647649
}
648650
Ok(())
649651
}
652+
653+
pub(crate) async fn backup_disk(&self, id: &str, level: &str) -> Result<()> {
654+
let work_dir = self.work_dir(id);
655+
656+
// Determine backup level based on the backup_type
657+
let backup_level = match level {
658+
"full" => "full",
659+
"incremental" => "inc",
660+
_ => bail!("Invalid backup level: {level}"),
661+
};
662+
663+
// Get the VM directory path as a string
664+
let backup_dir = work_dir.path().join("backups");
665+
let qmp_socket = work_dir.qmp_socket().to_string_lossy().to_string();
666+
667+
// Create backup directory if it doesn't exist
668+
tokio::fs::create_dir_all(&backup_dir)
669+
.await
670+
.context("Failed to create backup directory")?;
671+
672+
// Run the qmpbackup command in a blocking thread pool since it takes seconds to complete
673+
tokio::task::spawn_blocking(move || {
674+
let output = std::process::Command::new("qmpbackup")
675+
.arg("--socket")
676+
.arg(qmp_socket)
677+
.arg("backup")
678+
.arg("-i")
679+
.arg("hd1")
680+
.arg("--no-subdir")
681+
.arg("-t")
682+
.arg(&backup_dir)
683+
.arg("-T")
684+
.arg("-l")
685+
.arg(backup_level)
686+
.output();
687+
688+
match output {
689+
Ok(output) => {
690+
if !output.status.success() {
691+
let stderr = String::from_utf8_lossy(&output.stderr);
692+
Err(anyhow::anyhow!("qmpbackup command failed: {}", stderr))
693+
} else {
694+
Ok(())
695+
}
696+
}
697+
Err(e) => Err(anyhow::anyhow!(
698+
"Failed to execute qmpbackup command: {}",
699+
e
700+
)),
701+
}
702+
})
703+
.await
704+
.context("Failed to execute backup task")?
705+
}
706+
707+
pub(crate) async fn list_backups(&self, id: &str) -> Result<Vec<BackupInfo>> {
708+
let work_dir = self.work_dir(id);
709+
let backup_dir = work_dir.path().join("backups");
710+
711+
// Create backup directory if it doesn't exist
712+
if !backup_dir.exists() {
713+
return Ok(Vec::new());
714+
}
715+
716+
// List backup files in the directory
717+
let mut backups = Vec::new();
718+
719+
// Read directory entries in a blocking task
720+
let backup_dir_clone = backup_dir.clone();
721+
let entries =
722+
std::fs::read_dir(backup_dir_clone).context("Failed to read backup directory")?;
723+
// Process each entry
724+
for entry in entries {
725+
let path = match entry {
726+
Ok(entry) => entry.path(),
727+
Err(e) => {
728+
warn!("Failed to read directory entry: {e:?}");
729+
continue;
730+
}
731+
};
732+
// Skip if not a file
733+
if !path.is_file() {
734+
continue;
735+
}
736+
737+
// Get file name
738+
let file_name = match path.file_name().and_then(|n| n.to_str()) {
739+
Some(name) => name.to_string(),
740+
None => continue,
741+
};
742+
743+
if !file_name.ends_with(".img") {
744+
continue;
745+
}
746+
747+
backups.push(BackupInfo {
748+
filename: file_name,
749+
size: path
750+
.metadata()
751+
.context("Failed to get file metadata")?
752+
.len(),
753+
});
754+
}
755+
Ok(backups)
756+
}
650757
}
651758

652759
fn paginate<T>(items: Vec<T>, page: u32, page_size: u32) -> impl Iterator<Item = T> {

vmm/src/main_service.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use std::time::{SystemTime, UNIX_EPOCH};
33

44
use anyhow::{anyhow, bail, Context, Result};
55
use dstack_types::AppCompose;
6-
use dstack_vmm_rpc as rpc;
76
use dstack_vmm_rpc::vmm_server::{VmmRpc, VmmServer};
7+
use dstack_vmm_rpc::{self as rpc, BackupDiskRequest};
88
use dstack_vmm_rpc::{
99
AppId, ComposeHash as RpcComposeHash, GatewaySettings, GetInfoResponse, GetMetaResponse, Id,
1010
ImageInfo as RpcImageInfo, ImageListResponse, KmsSettings, ListGpusResponse, PublicKeyResponse,
@@ -456,6 +456,15 @@ impl VmmRpc for RpcHandler {
456456
let hash = hex_sha256(&request.compose_file);
457457
Ok(RpcComposeHash { hash })
458458
}
459+
460+
async fn backup_disk(self, request: BackupDiskRequest) -> Result<()> {
461+
self.app.backup_disk(&request.id, &request.level).await
462+
}
463+
464+
async fn list_backups(self, request: Id) -> Result<rpc::ListBackupsResponse> {
465+
let backups = self.app.list_backups(&request.id).await?;
466+
Ok(rpc::ListBackupsResponse { backups })
467+
}
459468
}
460469

461470
impl RpcCall<App> for RpcHandler {

0 commit comments

Comments
 (0)