Skip to content

Commit 7e033f7

Browse files
committed
vmm: add migration-progress HTTP endpoint
This prepares the HTTP endpoint to export ongoing VM live-migration progress. This works because of the following fundamental prerequisites: - internal API was made async - http thread was made async This way, one can send requests to fetch the latest state without blocking anywhere. Signed-off-by: Philipp Schuster <[email protected]> On-behalf-of: SAP [email protected]
1 parent 14c7583 commit 7e033f7

File tree

6 files changed

+441
-6
lines changed

6 files changed

+441
-6
lines changed

vm-migration/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use thiserror::Error;
1010
use crate::protocol::MemoryRangeTable;
1111

1212
mod bitpos_iterator;
13+
pub mod progress;
1314
pub mod protocol;
1415
pub mod tls;
1516

vm-migration/src/progress.rs

Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,346 @@
1+
// Copyright © 2025 Cyberus Technology GmbH
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
//! Module for reporting of the live-migration progress.
6+
//!
7+
//! The main export is [`MigrationProgressAndStatus`].
8+
9+
use std::error::Error;
10+
use std::num::NonZeroU32;
11+
use std::time::{Duration, SystemTime, UNIX_EPOCH};
12+
13+
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
14+
pub enum TransportationMode {
15+
Local,
16+
Tcp { connections: NonZeroU32, tls: bool },
17+
}
18+
19+
/// Carries information about the transmission of the VM's memory.
20+
#[derive(Clone, Copy, Debug, Default, serde::Serialize, serde::Deserialize)]
21+
pub struct MemoryTransmissionInfo {
22+
/// The memory iteration (only in precopy mode).
23+
pub memory_iteration: u64,
24+
/// Memory bytes per second.
25+
pub memory_transmission_bps: u64,
26+
/// The total size of the VMs memory in bytes.
27+
pub memory_bytes_total: u64,
28+
/// The total size of transmitted bytes.
29+
pub memory_bytes_transmitted: u64,
30+
/// The amount of remaining bytes for this iteration.
31+
pub memory_bytes_remaining_iteration: u64,
32+
/// The amount of transmitted 4k pages.
33+
pub memory_pages_4k_transmitted: u64,
34+
/// The amount of remaining 4k pages for this iteration.
35+
pub memory_pages_4k_remaining_iteration: u64,
36+
/// The amount of zero pages for that we could take a shortcut
37+
/// as all bytes have on fixed value (e.g., a zero page).
38+
pub memory_pages_constant_count: u64,
39+
/// Current memory dirty rate in pages per seconds.
40+
pub memory_dirty_rate_pps: u64,
41+
}
42+
43+
/// The different phases of an ongoing migration (good case).
44+
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
45+
pub enum MigrationPhase {
46+
/// The migration starts. Handshake and transfer of VM config.
47+
Starting,
48+
/// Transfer of memory FDs.
49+
///
50+
/// Only used for local migrations.
51+
MemoryFds,
52+
/// Transfer of VM memory in precopy mode.
53+
///
54+
/// Not used for local migrations.
55+
MemoryPrecopy,
56+
/*/// Transfer of VM memory in postcopy mode.
57+
///
58+
/// This follows after a precopy phase.
59+
///
60+
/// Not used for local migrations.
61+
MemoryPostcopy,*/
62+
/// The VM migration is completing. This means the last chunks of memory
63+
/// are transmitted as well as the final VM state (vCPUs, devices).
64+
Completing,
65+
}
66+
67+
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
68+
pub enum MigrationProgressState {
69+
/// The migration has been cancelled.
70+
Cancelled {
71+
/// The latest memory transmission info, if any.
72+
memory_transmission_info: MemoryTransmissionInfo,
73+
},
74+
/// The migration has failed.
75+
Failed {
76+
/// The last memory transmission info, if any.
77+
memory_transmission_info: MemoryTransmissionInfo,
78+
/// Stringified error.
79+
error_msg: String,
80+
/// Debug-stringified error.
81+
error_msg_debug: String,
82+
// TODO this is very tricky because I need clone()
83+
// error: Box<dyn Error>,
84+
},
85+
/// The migration has finished successfully.
86+
Finished {
87+
/// The last memory transmission info, if any.
88+
memory_transmission_info: MemoryTransmissionInfo,
89+
},
90+
/// The migration is ongoing.
91+
Ongoing {
92+
phase: MigrationPhase,
93+
memory_transmission_info: MemoryTransmissionInfo,
94+
/// Percent in range `0..=100`.
95+
vcpu_throttle_percent: u8,
96+
},
97+
}
98+
99+
impl MigrationProgressState {
100+
fn memory_transmission_info(&self) -> MemoryTransmissionInfo {
101+
match self {
102+
MigrationProgressState::Cancelled {
103+
memory_transmission_info,
104+
..
105+
} => *memory_transmission_info,
106+
MigrationProgressState::Failed {
107+
memory_transmission_info,
108+
..
109+
} => *memory_transmission_info,
110+
MigrationProgressState::Finished {
111+
memory_transmission_info,
112+
..
113+
} => *memory_transmission_info,
114+
MigrationProgressState::Ongoing {
115+
memory_transmission_info,
116+
..
117+
} => *memory_transmission_info,
118+
}
119+
}
120+
121+
fn state_name(&self) -> &'static str {
122+
match self {
123+
MigrationProgressState::Cancelled { .. } => "cancelled",
124+
MigrationProgressState::Failed { .. } => "failed",
125+
MigrationProgressState::Finished { .. } => "finished",
126+
MigrationProgressState::Ongoing { .. } => "ongoing",
127+
}
128+
}
129+
130+
fn cpu_throttle_percent(&self) -> Option<u8> {
131+
match self {
132+
MigrationProgressState::Ongoing {
133+
vcpu_throttle_percent,
134+
..
135+
} => Some(*vcpu_throttle_percent),
136+
_ => None,
137+
}
138+
}
139+
}
140+
141+
/// Returns the current UNIX timestamp in ms.
142+
fn current_unix_timestamp_ms() -> u64 {
143+
SystemTime::now()
144+
.duration_since(UNIX_EPOCH)
145+
.unwrap()
146+
.as_millis() as u64
147+
}
148+
149+
/// Type holding a current snapshot about the progress and status information
150+
/// of an ongoing live migration.
151+
///
152+
/// The states correspond to the [live-migration protocol]. This type was
153+
/// specifically crafted with easy yet clear semantics for API users in mind.
154+
///
155+
/// [live-migration protocol]: super::protocol
156+
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
157+
pub struct MigrationProgressAndStatus {
158+
/// UNIX timestamp of the start of the live-migration process.
159+
pub timestamp_begin: u64,
160+
/// UNIX timestamp of the current snapshot.
161+
pub timestamp_snapshot: u64,
162+
/// Configured target downtime.
163+
pub downtime_ms_target: u64,
164+
/// The currently expected downtime.
165+
pub downtime_ms_expected: Option<u64>,
166+
/// The requested transportation mode.
167+
pub transportation_mode: TransportationMode,
168+
/// Snapshot of the current phase.
169+
pub state: MigrationProgressState,
170+
}
171+
172+
impl MigrationProgressAndStatus {
173+
pub fn new(transportation_mode: TransportationMode, target_downtime: Duration) -> Self {
174+
let timestamp = current_unix_timestamp_ms();
175+
Self {
176+
timestamp_begin: timestamp,
177+
timestamp_snapshot: timestamp,
178+
downtime_ms_target: target_downtime.as_millis() as u64,
179+
downtime_ms_expected: None,
180+
transportation_mode,
181+
state: MigrationProgressState::Ongoing {
182+
phase: MigrationPhase::Starting,
183+
memory_transmission_info: MemoryTransmissionInfo::default(),
184+
vcpu_throttle_percent: 42,
185+
},
186+
}
187+
}
188+
189+
/// Updates the state of an ongoing migration.
190+
pub fn update_ongoing_migration_state(
191+
&mut self,
192+
phase: MigrationPhase,
193+
latest_memory_transmission_info: Option<MemoryTransmissionInfo>,
194+
latest_cpu_throttle_percent: Option<u8>,
195+
) {
196+
if !matches!(self.state, MigrationProgressState::Ongoing { .. }) {
197+
panic!(
198+
"illegal state transition: {} -> ongoing",
199+
self.state.state_name()
200+
);
201+
}
202+
203+
if let Some(cpu_throttle_percent) = latest_cpu_throttle_percent {
204+
assert!(cpu_throttle_percent <= 100);
205+
}
206+
207+
self.state = MigrationProgressState::Ongoing {
208+
phase,
209+
memory_transmission_info: latest_memory_transmission_info
210+
.unwrap_or_else(|| self.state.memory_transmission_info()),
211+
vcpu_throttle_percent: latest_cpu_throttle_percent
212+
.or_else(|| self.state.cpu_throttle_percent())
213+
.unwrap_or(0),
214+
};
215+
}
216+
217+
/// Sets the underlying state to [`MigrationProgressState::Cancelled`] and
218+
/// updates all corresponding metadata.
219+
///
220+
/// After this state change, the object is supposed to be handled as immutable.
221+
pub fn mark_as_cancelled(&mut self) {
222+
if !matches!(self.state, MigrationProgressState::Ongoing { .. }) {
223+
panic!(
224+
"illegal state transition: {} -> cancelled",
225+
self.state.state_name()
226+
);
227+
}
228+
self.timestamp_snapshot = current_unix_timestamp_ms();
229+
self.timestamp_snapshot = current_unix_timestamp_ms();
230+
self.state = MigrationProgressState::Cancelled {
231+
memory_transmission_info: self.state.memory_transmission_info(),
232+
};
233+
}
234+
235+
/// Sets the underlying state to [`MigrationProgressState::Failed`] and
236+
/// updates all corresponding metadata.
237+
///
238+
/// After this state change, the object is supposed to be handled as immutable.
239+
pub fn mark_as_failed(&mut self, error: &dyn Error) {
240+
if !matches!(self.state, MigrationProgressState::Ongoing { .. }) {
241+
panic!(
242+
"illegal state transition: {} -> failed",
243+
self.state.state_name()
244+
);
245+
}
246+
self.timestamp_snapshot = current_unix_timestamp_ms();
247+
self.state = MigrationProgressState::Failed {
248+
memory_transmission_info: self.state.memory_transmission_info(),
249+
error_msg: format!("{error}",),
250+
error_msg_debug: format!("{error:?}",),
251+
};
252+
}
253+
254+
/// Sets the underlying state to [`MigrationProgressState::Finished`] and
255+
/// updates all corresponding metadata.
256+
///
257+
/// After this state change, the object is supposed to be handled as immutable.
258+
pub fn mark_as_finished(&mut self) {
259+
if !matches!(self.state, MigrationProgressState::Ongoing { .. }) {
260+
panic!(
261+
"illegal state transition: {} -> finished",
262+
self.state.state_name()
263+
);
264+
}
265+
self.timestamp_snapshot = current_unix_timestamp_ms();
266+
self.state = MigrationProgressState::Finished {
267+
memory_transmission_info: self.state.memory_transmission_info(),
268+
};
269+
}
270+
}
271+
272+
#[cfg(test)]
273+
mod tests {
274+
use anyhow::anyhow;
275+
276+
use super::*;
277+
278+
// Helpful to see what the API will look like.
279+
#[test]
280+
fn print_json() {
281+
let starting = MigrationProgressAndStatus::new(
282+
TransportationMode::Tcp {
283+
connections: NonZeroU32::new(1).unwrap(),
284+
tls: false,
285+
},
286+
Duration::from_millis(100),
287+
);
288+
let memory_precopy = {
289+
let mut state = starting.clone();
290+
state.update_ongoing_migration_state(
291+
MigrationPhase::MemoryPrecopy,
292+
Some(MemoryTransmissionInfo {
293+
memory_iteration: 7,
294+
memory_transmission_bps: 0,
295+
memory_bytes_total: 0x1337,
296+
memory_bytes_transmitted: 0x1337,
297+
memory_pages_4k_transmitted: 42,
298+
memory_pages_4k_remaining_iteration: 42,
299+
memory_bytes_remaining_iteration: 124,
300+
memory_dirty_rate_pps: 42,
301+
memory_pages_constant_count: 0,
302+
}),
303+
Some(42),
304+
);
305+
state
306+
};
307+
let completing = {
308+
let mut state = memory_precopy.clone();
309+
state.update_ongoing_migration_state(MigrationPhase::Completing, None, Some(99));
310+
state
311+
};
312+
let completed = {
313+
let mut state = completing.clone();
314+
state.mark_as_finished();
315+
state
316+
};
317+
let failed = {
318+
let mut state = completing.clone();
319+
let error = anyhow!("Some very bad error".to_string());
320+
let error: &dyn Error = error.as_ref();
321+
state.mark_as_failed(error);
322+
state
323+
};
324+
let cancelled = {
325+
let mut state = completing.clone();
326+
state.mark_as_cancelled();
327+
state
328+
};
329+
330+
let vals = [
331+
starting,
332+
memory_precopy,
333+
completing,
334+
completed,
335+
failed,
336+
cancelled,
337+
];
338+
for val in vals {
339+
println!(
340+
"{:?}:\n{}\n\n",
341+
val,
342+
serde_json::to_string_pretty(&val).unwrap()
343+
);
344+
}
345+
}
346+
}

0 commit comments

Comments
 (0)