Skip to content

Commit d349e91

Browse files
committed
feat(vmm): add userfault channels to vm and vcpus
These will be used to communicate vCPU faults between vCPUs and the VM if secret freedom is enabled. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 8bcc2c2 commit d349e91

File tree

4 files changed

+110
-9
lines changed

4 files changed

+110
-9
lines changed

src/vmm/src/builder.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ fn create_vmm_and_vcpus(
162162
// Instantiate ACPI device manager.
163163
let acpi_device_manager = ACPIDeviceManager::new();
164164

165-
let (vcpus, vcpus_exit_evt) = vm.create_vcpus(vcpu_count)?;
165+
let (vcpus, vcpus_exit_evt, userfault_channels) = vm.create_vcpus(vcpu_count, secret_free)?;
166166

167167
#[cfg(target_arch = "x86_64")]
168168
let pio_device_manager = {
@@ -192,6 +192,7 @@ fn create_vmm_and_vcpus(
192192
vm,
193193
uffd: None,
194194
uffd_socket: None,
195+
userfault_channels,
195196
vcpus_handles: Vec::new(),
196197
vcpus_exit_evt,
197198
resource_allocator,
@@ -1034,7 +1035,7 @@ pub(crate) mod tests {
10341035
)
10351036
.unwrap();
10361037

1037-
let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap();
1038+
let (_, vcpus_exit_evt, _) = vm.create_vcpus(1, false).unwrap();
10381039

10391040
Vmm {
10401041
events_observer: Some(std::io::stdin()),
@@ -1044,6 +1045,7 @@ pub(crate) mod tests {
10441045
vm,
10451046
uffd: None,
10461047
uffd_socket: None,
1048+
userfault_channels: None,
10471049
vcpus_handles: Vec::new(),
10481050
vcpus_exit_evt,
10491051
resource_allocator: ResourceAllocator::new().unwrap(),

src/vmm/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ use crate::vmm_config::instance_info::{InstanceInfo, VmState};
154154
use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
155155
use crate::vstate::vcpu::VcpuState;
156156
pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
157+
use crate::vstate::vm::UserfaultChannel;
157158
pub use crate::vstate::vm::Vm;
158159

159160
/// Shorthand type for the EventManager flavour used by Firecracker.
@@ -313,6 +314,8 @@ pub struct Vmm {
313314
uffd: Option<Uffd>,
314315
// Used for userfault communication with the UFFD handler when secret freedom is enabled
315316
uffd_socket: Option<UnixStream>,
317+
// Used for userfault communication with vCPUs when secret freedom is enabled
318+
userfault_channels: Option<Vec<UserfaultChannel>>,
316319
vcpus_handles: Vec<VcpuHandle>,
317320
// Used by Vcpus and devices to initiate teardown; Vmm should never write here.
318321
vcpus_exit_evt: EventFd,

src/vmm/src/vstate/vcpu.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use crate::logger::{IncMetric, METRICS};
3131
use crate::seccomp::{BpfProgram, BpfProgramRef};
3232
use crate::utils::signal::{Killable, register_signal_handler, sigrtmin};
3333
use crate::utils::sm::StateMachine;
34-
use crate::vstate::vm::Vm;
34+
use crate::vstate::vm::{UserfaultChannel, Vm};
3535

3636
/// Signal number (SIGRTMIN) used to kick Vcpus.
3737
pub const VCPU_RTSIG_OFFSET: i32 = 0;
@@ -109,6 +109,8 @@ pub struct Vcpu {
109109
response_receiver: Option<Receiver<VcpuResponse>>,
110110
/// The transmitting end of the responses channel owned by the vcpu side.
111111
response_sender: Sender<VcpuResponse>,
112+
/// Channel for communicating userfaults with the VMM thread
113+
userfault_channel: Option<UserfaultChannel>,
112114
}
113115

114116
impl Vcpu {
@@ -201,7 +203,13 @@ impl Vcpu {
201203
/// * `index` - Represents the 0-based CPU index between [0, max vcpus).
202204
/// * `vm` - The vm to which this vcpu will get attached.
203205
/// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits.
204-
pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result<Self, VcpuError> {
206+
/// * `userfault_channel` - An optional userfault channel for handling page faults.
207+
pub fn new(
208+
index: u8,
209+
vm: &Vm,
210+
exit_evt: EventFd,
211+
userfault_channel: Option<UserfaultChannel>,
212+
) -> Result<Self, VcpuError> {
205213
let (event_sender, event_receiver) = channel();
206214
let (response_sender, response_receiver) = channel();
207215
let kvm_vcpu = KvmVcpu::new(index, vm).unwrap();
@@ -215,6 +223,7 @@ impl Vcpu {
215223
#[cfg(feature = "gdb")]
216224
gdb_event: None,
217225
kvm_vcpu,
226+
userfault_channel,
218227
})
219228
}
220229

@@ -922,7 +931,7 @@ pub(crate) mod tests {
922931
pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu) {
923932
let (kvm, mut vm) = setup_vm_with_memory(mem_size);
924933

925-
let (mut vcpus, _) = vm.create_vcpus(1).unwrap();
934+
let (mut vcpus, _, _) = vm.create_vcpus(1, false).unwrap();
926935
let mut vcpu = vcpus.remove(0);
927936

928937
#[cfg(target_arch = "aarch64")]

src/vmm/src/vstate/vm.rs

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::os::fd::{AsFd, AsRawFd, FromRawFd};
1212
use std::path::Path;
1313
use std::sync::Arc;
1414

15+
use bincode::{Decode, Encode};
1516
use kvm_bindings::{
1617
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEMORY_ATTRIBUTE_PRIVATE, KVMIO,
1718
kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region,
@@ -36,6 +37,47 @@ use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3637

3738
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
3839

40+
/// KVM userfault information
41+
#[derive(Copy, Clone, Decode, Default, Eq, PartialEq, Debug, Encode)]
42+
pub struct UserfaultData {
43+
/// Flags
44+
pub flags: u64,
45+
/// Guest physical address
46+
pub gpa: u64,
47+
/// Size
48+
pub size: u64,
49+
}
50+
51+
/// KVM userfault channel
52+
#[derive(Debug)]
53+
pub struct UserfaultChannel {
54+
/// Sender
55+
pub sender: File,
56+
/// Receiver
57+
pub receiver: File,
58+
}
59+
60+
fn pipe2(flags: libc::c_int) -> std::io::Result<(File, File)> {
61+
let mut fds = [0, 0];
62+
63+
// SAFETY: pipe2() is safe to call with a valid mutable pointer to an array of 2 integers
64+
// The fds array is stack-allocated and lives for the entire unsafe block.
65+
let res = unsafe { libc::pipe2(fds.as_mut_ptr(), flags) };
66+
67+
if res == 0 {
68+
Ok((
69+
// SAFETY: fds[0] contains a valid file descriptor for the read end of the pipe
70+
// We only convert successful pipe2() calls, and each fd is used exactly once.
71+
unsafe { File::from_raw_fd(fds[0]) },
72+
// SAFETY: fds[1] contains a valid file descriptor for the write end of the pipe
73+
// We only convert successful pipe2() calls, and each fd is used exactly once.
74+
unsafe { File::from_raw_fd(fds[1]) },
75+
))
76+
} else {
77+
Err(std::io::Error::last_os_error())
78+
}
79+
}
80+
3981
/// Architecture independent parts of a VM.
4082
#[derive(Debug)]
4183
pub struct VmCommon {
@@ -60,6 +102,8 @@ pub enum VmError {
60102
Arch(#[from] ArchVmError),
61103
/// Error during eventfd operations: {0}
62104
EventFd(std::io::Error),
105+
/// Failed to create a userfault channel: {0}
106+
UserfaultChannel(std::io::Error),
63107
/// Failed to create vcpu: {0}
64108
CreateVcpu(VcpuError),
65109
/// The number of configured slots is bigger than the maximum reported by KVM
@@ -92,6 +136,8 @@ struct kvm_userspace_memory_region2 {
92136
pad2: [u64; 13],
93137
}
94138

139+
type VcpuCreationResult = Result<(Vec<Vcpu>, EventFd, Option<Vec<UserfaultChannel>>), VmError>;
140+
95141
/// Contains Vm functions that are usable across CPU architectures
96142
impl Vm {
97143
/// Create a KVM VM
@@ -154,24 +200,65 @@ impl Vm {
154200
})
155201
}
156202

203+
fn create_userfault_channels(
204+
&self,
205+
secret_free: bool,
206+
) -> Result<(Option<UserfaultChannel>, Option<UserfaultChannel>), std::io::Error> {
207+
if secret_free {
208+
let (receiver_vcpu_to_vm, sender_vcpu_to_vm) = pipe2(libc::O_NONBLOCK)?;
209+
let (receiver_vm_to_vcpu, sender_vm_to_vcpu) = pipe2(0)?;
210+
Ok((
211+
Some(UserfaultChannel {
212+
sender: sender_vcpu_to_vm,
213+
receiver: receiver_vm_to_vcpu,
214+
}),
215+
Some(UserfaultChannel {
216+
sender: sender_vm_to_vcpu,
217+
receiver: receiver_vcpu_to_vm,
218+
}),
219+
))
220+
} else {
221+
Ok((None, None))
222+
}
223+
}
224+
157225
/// Creates the specified number of [`Vcpu`]s.
158226
///
159227
/// The returned [`EventFd`] is written to whenever any of the vcpus exit.
160-
pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec<Vcpu>, EventFd), VmError> {
228+
pub fn create_vcpus(&mut self, vcpu_count: u8, secret_free: bool) -> VcpuCreationResult {
161229
self.arch_pre_create_vcpus(vcpu_count)?;
162230

163231
let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?;
164232

165233
let mut vcpus = Vec::with_capacity(vcpu_count as usize);
234+
let mut userfault_channels = Vec::with_capacity(vcpu_count as usize);
166235
for cpu_idx in 0..vcpu_count {
167236
let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?;
168-
let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?;
237+
238+
let (vcpu_channel, vmm_channel) = self
239+
.create_userfault_channels(secret_free)
240+
.map_err(VmError::UserfaultChannel)?;
241+
242+
let vcpu =
243+
Vcpu::new(cpu_idx, self, exit_evt, vcpu_channel).map_err(VmError::CreateVcpu)?;
169244
vcpus.push(vcpu);
245+
246+
if secret_free {
247+
userfault_channels.push(vmm_channel.unwrap());
248+
}
170249
}
171250

172251
self.arch_post_create_vcpus(vcpu_count)?;
173252

174-
Ok((vcpus, exit_evt))
253+
Ok((
254+
vcpus,
255+
exit_evt,
256+
if secret_free {
257+
Some(userfault_channels)
258+
} else {
259+
None
260+
},
261+
))
175262
}
176263

177264
/// Create a guest_memfd of the specified size
@@ -605,7 +692,7 @@ pub(crate) mod tests {
605692
let vcpu_count = 2;
606693
let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
607694

608-
let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap();
695+
let (vcpu_vec, _, _) = vm.create_vcpus(vcpu_count, false).unwrap();
609696

610697
assert_eq!(vcpu_vec.len(), vcpu_count as usize);
611698
}

0 commit comments

Comments
 (0)