Skip to content

Commit 34e6bb0

Browse files
committed
feat(vmm): add userfault channels to vm and vcpus
These will be used to communicate vCPU faults between vCPUs and the VM if secret freedom is enabled. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent c25ecbf commit 34e6bb0

File tree

4 files changed

+112
-9
lines changed

4 files changed

+112
-9
lines changed

src/vmm/src/builder.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ fn create_vmm_and_vcpus(
162162
// Instantiate ACPI device manager.
163163
let acpi_device_manager = ACPIDeviceManager::new();
164164

165-
let (vcpus, vcpus_exit_evt) = vm.create_vcpus(vcpu_count)?;
165+
let (vcpus, vcpus_exit_evt, userfault_channels) = vm.create_vcpus(vcpu_count, secret_free)?;
166166

167167
#[cfg(target_arch = "x86_64")]
168168
let pio_device_manager = {
@@ -192,6 +192,7 @@ fn create_vmm_and_vcpus(
192192
vm,
193193
uffd: None,
194194
uffd_socket: None,
195+
userfault_channels,
195196
vcpus_handles: Vec::new(),
196197
vcpus_exit_evt,
197198
resource_allocator,
@@ -1034,7 +1035,7 @@ pub(crate) mod tests {
10341035
)
10351036
.unwrap();
10361037

1037-
let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap();
1038+
let (_, vcpus_exit_evt, _) = vm.create_vcpus(1, false).unwrap();
10381039

10391040
Vmm {
10401041
events_observer: Some(std::io::stdin()),
@@ -1044,6 +1045,7 @@ pub(crate) mod tests {
10441045
vm,
10451046
uffd: None,
10461047
uffd_socket: None,
1048+
userfault_channels: None,
10471049
vcpus_handles: Vec::new(),
10481050
vcpus_exit_evt,
10491051
resource_allocator: ResourceAllocator::new().unwrap(),

src/vmm/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ use crate::vmm_config::instance_info::{InstanceInfo, VmState};
154154
use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
155155
use crate::vstate::vcpu::VcpuState;
156156
pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
157+
use crate::vstate::vm::UserfaultChannel;
157158
pub use crate::vstate::vm::Vm;
158159

159160
/// Shorthand type for the EventManager flavour used by Firecracker.
@@ -313,6 +314,8 @@ pub struct Vmm {
313314
uffd: Option<Uffd>,
314315
// Used for userfault communication with the UFFD handler when secret freedom is enabled
315316
uffd_socket: Option<UnixStream>,
317+
// Used for userfault communication with vCPUs when secret freedom is enabled
318+
userfault_channels: Option<Vec<UserfaultChannel>>,
316319
vcpus_handles: Vec<VcpuHandle>,
317320
// Used by Vcpus and devices to initiate teardown; Vmm should never write here.
318321
vcpus_exit_evt: EventFd,

src/vmm/src/vstate/vcpu.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use crate::logger::{IncMetric, METRICS};
3131
use crate::seccomp::{BpfProgram, BpfProgramRef};
3232
use crate::utils::signal::{Killable, register_signal_handler, sigrtmin};
3333
use crate::utils::sm::StateMachine;
34-
use crate::vstate::vm::Vm;
34+
use crate::vstate::vm::{UserfaultChannel, Vm};
3535

3636
/// Signal number (SIGRTMIN) used to kick Vcpus.
3737
pub const VCPU_RTSIG_OFFSET: i32 = 0;
@@ -109,6 +109,8 @@ pub struct Vcpu {
109109
response_receiver: Option<Receiver<VcpuResponse>>,
110110
/// The transmitting end of the responses channel owned by the vcpu side.
111111
response_sender: Sender<VcpuResponse>,
112+
/// Channel for communicating userfaults with the VMM thread
113+
userfault_channel: Option<UserfaultChannel>,
112114
}
113115

114116
impl Vcpu {
@@ -201,7 +203,13 @@ impl Vcpu {
201203
/// * `index` - Represents the 0-based CPU index between [0, max vcpus).
202204
/// * `vm` - The vm to which this vcpu will get attached.
203205
/// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits.
204-
pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result<Self, VcpuError> {
206+
/// * `userfault_channel` - An optional userfault channel for handling page faults.
207+
pub fn new(
208+
index: u8,
209+
vm: &Vm,
210+
exit_evt: EventFd,
211+
userfault_channel: Option<UserfaultChannel>,
212+
) -> Result<Self, VcpuError> {
205213
let (event_sender, event_receiver) = channel();
206214
let (response_sender, response_receiver) = channel();
207215
let kvm_vcpu = KvmVcpu::new(index, vm).unwrap();
@@ -215,6 +223,7 @@ impl Vcpu {
215223
#[cfg(feature = "gdb")]
216224
gdb_event: None,
217225
kvm_vcpu,
226+
userfault_channel,
218227
})
219228
}
220229

@@ -922,7 +931,7 @@ pub(crate) mod tests {
922931
pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu) {
923932
let (kvm, mut vm) = setup_vm_with_memory(mem_size);
924933

925-
let (mut vcpus, _) = vm.create_vcpus(1).unwrap();
934+
let (mut vcpus, _, _) = vm.create_vcpus(1, false).unwrap();
926935
let mut vcpu = vcpus.remove(0);
927936

928937
#[cfg(target_arch = "aarch64")]

src/vmm/src/vstate/vm.rs

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::os::fd::{AsFd, AsRawFd, FromRawFd};
1212
use std::path::Path;
1313
use std::sync::Arc;
1414

15+
use bincode::{Decode, Encode};
1516
use kvm_bindings::{
1617
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVMIO,
1718
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region,
@@ -36,6 +37,47 @@ use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3637

3738
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
3839

40+
/// KVM userfault information
41+
#[derive(Copy, Clone, Decode, Default, Eq, PartialEq, Debug, Encode)]
42+
pub struct UserfaultData {
43+
/// Flags
44+
pub flags: u64,
45+
/// Guest physical address
46+
pub gpa: u64,
47+
/// Size
48+
pub size: u64,
49+
}
50+
51+
/// KVM userfault channel
52+
#[derive(Debug)]
53+
pub struct UserfaultChannel {
54+
/// Sender
55+
pub sender: File,
56+
/// Receiver
57+
pub receiver: File,
58+
}
59+
60+
fn pipe2(flags: libc::c_int) -> std::io::Result<(File, File)> {
61+
let mut fds = [0, 0];
62+
63+
// SAFETY: pipe2() is safe to call with a valid mutable pointer to an array of 2 integers
64+
// The fds array is stack-allocated and lives for the entire unsafe block.
65+
let res = unsafe { libc::pipe2(fds.as_mut_ptr(), flags) };
66+
67+
if res == 0 {
68+
Ok((
69+
// SAFETY: fds[0] contains a valid file descriptor for the read end of the pipe
70+
// We only convert successful pipe2() calls, and each fd is used exactly once.
71+
unsafe { File::from_raw_fd(fds[0]) },
72+
// SAFETY: fds[1] contains a valid file descriptor for the write end of the pipe
73+
// We only convert successful pipe2() calls, and each fd is used exactly once.
74+
unsafe { File::from_raw_fd(fds[1]) },
75+
))
76+
} else {
77+
Err(std::io::Error::last_os_error())
78+
}
79+
}
80+
3981
/// Architecture independent parts of a VM.
4082
#[derive(Debug)]
4183
pub struct VmCommon {
@@ -60,6 +102,8 @@ pub enum VmError {
60102
Arch(#[from] ArchVmError),
61103
/// Error during eventfd operations: {0}
62104
EventFd(std::io::Error),
105+
/// Failed to create a userfault channel: {0}
106+
UserfaultChannel(std::io::Error),
63107
/// Failed to create vcpu: {0}
64108
CreateVcpu(VcpuError),
65109
/// The number of configured slots is bigger than the maximum reported by KVM
@@ -154,24 +198,69 @@ impl Vm {
154198
})
155199
}
156200

201+
fn create_userfault_channels(
202+
&self,
203+
secret_free: bool,
204+
) -> Result<(Option<UserfaultChannel>, Option<UserfaultChannel>), std::io::Error> {
205+
if secret_free {
206+
let (receiver_vcpu_to_vm, sender_vcpu_to_vm) = pipe2(libc::O_NONBLOCK)?;
207+
let (receiver_vm_to_vcpu, sender_vm_to_vcpu) = pipe2(0)?;
208+
Ok((
209+
Some(UserfaultChannel {
210+
sender: sender_vcpu_to_vm,
211+
receiver: receiver_vm_to_vcpu,
212+
}),
213+
Some(UserfaultChannel {
214+
sender: sender_vm_to_vcpu,
215+
receiver: receiver_vcpu_to_vm,
216+
}),
217+
))
218+
} else {
219+
Ok((None, None))
220+
}
221+
}
222+
157223
/// Creates the specified number of [`Vcpu`]s.
158224
///
159225
/// The returned [`EventFd`] is written to whenever any of the vcpus exit.
160-
pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec<Vcpu>, EventFd), VmError> {
226+
pub fn create_vcpus(
227+
&mut self,
228+
vcpu_count: u8,
229+
secret_free: bool,
230+
) -> Result<(Vec<Vcpu>, EventFd, Option<Vec<UserfaultChannel>>), VmError> {
161231
self.arch_pre_create_vcpus(vcpu_count)?;
162232

163233
let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?;
164234

165235
let mut vcpus = Vec::with_capacity(vcpu_count as usize);
236+
let mut userfault_channels = Vec::with_capacity(vcpu_count as usize);
166237
for cpu_idx in 0..vcpu_count {
167238
let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?;
168-
let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?;
239+
240+
let (vcpu_channel, vmm_channel) = self
241+
.create_userfault_channels(secret_free)
242+
.map_err(VmError::UserfaultChannel)?;
243+
244+
let vcpu =
245+
Vcpu::new(cpu_idx, self, exit_evt, vcpu_channel).map_err(VmError::CreateVcpu)?;
169246
vcpus.push(vcpu);
247+
248+
if secret_free {
249+
userfault_channels.push(vmm_channel.unwrap());
250+
}
170251
}
171252

172253
self.arch_post_create_vcpus(vcpu_count)?;
173254

174-
Ok((vcpus, exit_evt))
255+
Ok((
256+
vcpus,
257+
exit_evt,
258+
if secret_free {
259+
Some(userfault_channels)
260+
} else {
261+
None
262+
},
263+
))
175264
}
176265

177266
/// Create a guest_memfd of the specified size
@@ -602,7 +691,7 @@ pub(crate) mod tests {
602691
let vcpu_count = 2;
603692
let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
604693

605-
let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap();
694+
let (vcpu_vec, _, _) = vm.create_vcpus(vcpu_count, false).unwrap();
606695

607696
assert_eq!(vcpu_vec.len(), vcpu_count as usize);
608697
}

0 commit comments

Comments
 (0)