Skip to content

Commit f4bae29

Browse files
committed
vfio-ioctls: Support hardware-accelerated nested HWPT via iommufd
Add infrastructure to enable VFIO devices to leverage hardware IOMMU acceleration through iommufd's uAPIs. This allows userspace VMMs to attach VFIO devices to hardware-accelerated virtual IOMMUs, particularly enabling userspace to configure stage-1 (guest-managed) page tables that are composed with stage-2 (host-managed) page tables in hardware. This depends on the IommufdVIOMMU and IommufdVDevice abstractions introduced in the iommufd-ioctls crate [1]. New Public Interfaces: 1. VfioIommufd::new() signature change: - Added `s1_hwpt_data_type: Option<iommu_hwpt_data_type>` parameter - When `Some`, enables nested translation mode for subsequently attached VFIO devices - Supported types: IOMMU_HWPT_DATA_ARM_SMMUV3, IOMMU_HWPT_DATA_VTD_S1 2. VfioDevice::new_with_iommufd(): - New constructor for vfio devices backed by iommufd with hardware-accelerated nested HWPT support - Automatically creates IommufdVIommu/IommufdVDevice when nested mode is enabled via `VfioIommufd` - Supports sharing a single `IommufdVIommu` instance across multiple VFIO devices - Returns `IommufdVDevice` handle for subsequent S1 HWPT operations - Attaches device to bypass HWPT by default (until guest enables IOMMU) 3. VfioDevice::install_s1_hwpt(): - Install guest-configured stage-1 page tables into hardware - Called when guest writes to virtual IOMMU stream table entries - Atomically replaces existing S1 HWPT if present - Uses `IommufdHwptData` enum for type-safe hardware-specific configuration 4. VfioDevice::uninstall_s1_hwpt(): - Revert device to bypass or abort mode - abort=true: Use abort HWPT (fault all DMA) - abort=false: Use bypass HWPT (passthrough translation) - Called during guest IOMMU reset or shutdown Dependencies on iommufd-ioctls: This implementation builds upon three types from iommufd-ioctls [1]: - `IommufdVIommu`: Represents a physical IOMMU slice managing S2 HWPT and default S1 HWPTs (bypass/abort). Shared across devices behind the same virtual IOMMU. - `IommufdVDevice`: Represents a device attached to a `IommufdVIommu`. Handles dynamic S1 HWPT allocation and lifecycle management. - `IommufdHwptData`: Type-safe enum for architecture-specific HWPT configuration (SMMUv3 STE data, VT-d context entries). Integration Notes for VMMs: 1. VMM creates `VfioIommufd` with `s1_hwpt_data_type` if hardware accelerated virtual IOMMUs are enabled and used to manage VFIO devices 2. VMM calls `VfioDevice::new_with_iommufd()` per passthrough device - The same instance of virtual IOMMU should reuse the same instance of `IommufdVIommu` - Each VFIO device will has its own `VfioDevice` and `IommufdVDevice` instance 3. VMM need to make sure the virtual IOMMU is compatible with the physical IOMMU: - `IommufdVDevice::get_hw_info` is used to retrieve hardware information of the physical IOMMU 3. VMM traps guest IOMMU commands and calls: - `install_s1_hwpt()` when guest enables IOMMU - `uninstall_s1_hwpt()` when guest disables IOMMU - `IommufdVIommu::invalidate_hwpt()` when guest invalidate IOTLB entries This enables VMM to enable hardware-accelerated IOMMU to manage VFIO devices and use physical IOMMU hardware to directly process guest page tables. [1] cloud-hypervisor/iommufd#5 Signed-off-by: Bo Chen <bchen@crusoe.ai>
1 parent c437d21 commit f4bae29

File tree

3 files changed

+212
-3
lines changed

3 files changed

+212
-3
lines changed

vfio-ioctls/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,5 @@ mshv-bindings = { version = "0.6.5", features = [
3636
"fam-wrappers",
3737
], optional = true }
3838
mshv-ioctls = { version = "0.6.5", optional = true }
39-
iommufd-bindings = { git = "https://github.com/cloud-hypervisor/iommufd", rev = "083c016", optional = true }
40-
iommufd-ioctls = { git = "https://github.com/cloud-hypervisor/iommufd", rev = "083c016", optional = true }
39+
iommufd-bindings = { git = "https://github.com/likebreath/iommufd", branch = "0129/rfc_viommu_vdevice", optional = true }
40+
iommufd-ioctls = { git = "https://github.com/likebreath/iommufd", branch = "0129/rfc_viommu_vdevice", optional = true }

vfio-ioctls/src/lib.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,21 @@ pub enum VfioError {
167167
#[cfg(feature = "vfio_cdev")]
168168
#[error("failed iommufd ioctl")]
169169
IommufdIoctlError(#[source] IommufdError),
170+
#[cfg(feature = "vfio_cdev")]
171+
#[error("missing virt_sid for S1 HWPT setup")]
172+
MissingVirtSid,
173+
#[cfg(feature = "vfio_cdev")]
174+
#[error("failed to create iommufd vIOMMU")]
175+
NewIommufdVIommu(#[source] IommufdError),
176+
#[cfg(feature = "vfio_cdev")]
177+
#[error("failed to create iommufd vDevice")]
178+
NewIommufdVDevice(#[source] IommufdError),
179+
#[cfg(feature = "vfio_cdev")]
180+
#[error("failed to destroy s1 hwpt")]
181+
IommufdS1HwptDestroy(#[source] IommufdError),
182+
#[cfg(feature = "vfio_cdev")]
183+
#[error("failed to allocate s1 hwpt")]
184+
IommufdS1HwptAlloc(#[source] IommufdError),
170185
}
171186

172187
/// Specialized version of `Result` for VFIO subsystem.

vfio-ioctls/src/vfio_device.rs

Lines changed: 195 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use byteorder::{ByteOrder, NativeEndian};
1717
#[cfg(feature = "vfio_cdev")]
1818
use iommufd_bindings::*;
1919
#[cfg(feature = "vfio_cdev")]
20-
use iommufd_ioctls::IommuFd;
20+
use iommufd_ioctls::{IommuFd, IommufdHwptData, IommufdVDevice, IommufdVIommu};
2121
use log::{debug, error, warn};
2222
use vfio_bindings::bindings::vfio::*;
2323
use vm_memory::{Address, GuestMemory, GuestMemoryRegion, MemoryRegionAddress};
@@ -561,6 +561,7 @@ pub struct VfioIommufd {
561561
pub(crate) iommufd: Arc<IommuFd>,
562562
pub(crate) ioas_id: u32,
563563
common: VfioCommon,
564+
s1_hwpt_data_type: Option<iommu_hwpt_data_type>,
564565
}
565566

566567
#[cfg(feature = "vfio_cdev")]
@@ -572,10 +573,14 @@ impl VfioIommufd {
572573
/// * `iommufd`: the iommufd to be bound with the VFIO device
573574
/// * `ioas_id`: the IOAS id to be bound with the VFIO device
574575
/// * `device_fd`: An optional file handle of the hypervisor VFIO device.
576+
/// * `s1_hwpt_data_type`: An optional IOMMU hardware page table data type.
577+
/// - If `None`, nested HWPT is disabled.
578+
/// - If `Some`, nested HWPT is enabled with the provided data type.
575579
pub fn new(
576580
iommufd: Arc<IommuFd>,
577581
ioas_id: Option<u32>,
578582
device_fd: Option<VfioContainerDeviceHandle>,
583+
s1_hwpt_data_type: Option<iommu_hwpt_data_type>,
579584
) -> Result<Self> {
580585
let ioas_id = match ioas_id {
581586
Some(ioas_id) => ioas_id,
@@ -599,6 +604,7 @@ impl VfioIommufd {
599604
iommufd,
600605
ioas_id,
601606
common: VfioCommon { device_fd },
607+
s1_hwpt_data_type,
602608
};
603609

604610
Ok(vfio_iommufd)
@@ -1120,6 +1126,194 @@ impl VfioDevice {
11201126
})
11211127
}
11221128

1129+
#[cfg(feature = "vfio_cdev")]
1130+
/// Creates a new VFIO device backed by IOMMUFD.
1131+
///
1132+
/// This initializes a VFIO device with support for vIOMMU and vDevice abstractions
1133+
/// when nested hardware page tables (HWPT) are configured via the `VfioIommufd` instance.
1134+
///
1135+
/// # Arguments
1136+
///
1137+
/// * `sysfspath` - Path to the VFIO device in sysfs.
1138+
/// * Note: Future versions may support file descriptor interfaces to be more versatile. *
1139+
/// * `vfio_ops` - The VFIO operations wrapper (must be a `VfioIommufd` instance).
1140+
/// * `viommu` - An optional vIOMMU instance.
1141+
/// - If `None` and nested HWPT is enabled, a new vIOMMU instance is created and returned.
1142+
/// - If `Some`, the provided instance is reused.
1143+
/// * Note: The reused vIOMMU must be associated with the same physical IOMMU as this device;
1144+
/// otherwise, vDevice creation will fail and error will be returned.*
1145+
/// * `virt_sid` - The Virtual Stream ID. This is required if `s1_hwpt_data_type` is
1146+
/// configured in the `VfioIommufd` instance (i.e., nested HWPT is active).
1147+
///
1148+
/// # Returns
1149+
///
1150+
/// A tuple containing the initialized `VfioDevice` and an optional `IommufdVDevice`
1151+
/// (present only when nested HWPT is configured).
1152+
pub fn new_with_iommufd(
1153+
sysfspath: &Path,
1154+
vfio_ops: Arc<dyn VfioOps>,
1155+
viommu: &mut Option<Arc<IommufdVIommu>>,
1156+
virt_sid: Option<u64>,
1157+
) -> Result<(Self, Option<IommufdVDevice>)> {
1158+
let vfio_iommufd =
1159+
if let Some(vfio_iommufd) = vfio_ops.as_any().downcast_ref::<VfioIommufd>() {
1160+
vfio_iommufd
1161+
} else {
1162+
return Err(VfioError::DowncastVfioOps);
1163+
};
1164+
1165+
let (device_info, iommufd_vdevice) = {
1166+
// Open the vfio cdev file
1167+
let device = Self::get_device_cdev_from_path(sysfspath)?;
1168+
1169+
// Add the vfio cdev file to VFIO-KVM device tracking
1170+
vfio_iommufd
1171+
.common
1172+
.device_set_fd(device.as_raw_fd(), true)?;
1173+
1174+
// Bind the VFIO device to the iommufd file
1175+
let mut bind = vfio_device_bind_iommufd {
1176+
argsz: mem::size_of::<vfio_device_bind_iommufd>() as u32,
1177+
flags: 0,
1178+
iommufd: vfio_iommufd.iommufd.as_raw_fd(),
1179+
out_devid: 0,
1180+
};
1181+
vfio_syscall::bind_device_iommufd(&device, &mut bind)?;
1182+
1183+
let iommufd_vdevice = match vfio_iommufd.s1_hwpt_data_type {
1184+
// When no s1 hwpt is used, associate the vfio device to the IOAS within the bound iommufd
1185+
None => {
1186+
let mut attach_data = vfio_device_attach_iommufd_pt {
1187+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1188+
flags: 0,
1189+
pt_id: vfio_iommufd.ioas_id,
1190+
};
1191+
vfio_syscall::attach_device_iommufd_pt(&device, &mut attach_data)?;
1192+
1193+
None
1194+
}
1195+
// When s1 hwpt is used, create and attach vIOMMU and vDevice for nested (s1+s2) hwpt setup
1196+
Some(s1_hwpt_data_type) => {
1197+
let virt_id = if let Some(virt_sid) = virt_sid {
1198+
virt_sid
1199+
} else {
1200+
return Err(VfioError::MissingVirtSid);
1201+
};
1202+
1203+
let viommu = if let Some(viommu) = viommu {
1204+
// Reuse the passed in vIOMMU instance if available
1205+
viommu.clone()
1206+
} else {
1207+
// Allocate an instance of vIOMMU for the vfio device if no instance is passed in
1208+
let new_viommu = IommufdVIommu::new(
1209+
vfio_iommufd.iommufd.clone(),
1210+
vfio_iommufd.ioas_id,
1211+
bind.out_devid,
1212+
s1_hwpt_data_type,
1213+
)
1214+
.map_err(VfioError::NewIommufdVIommu)?;
1215+
1216+
let viommu_arc = Arc::new(new_viommu);
1217+
*viommu = Some(viommu_arc.clone());
1218+
1219+
viommu_arc
1220+
};
1221+
1222+
// Allocate an instance of vDevice
1223+
let vdevice = IommufdVDevice::new(viommu.clone(), bind.out_devid, virt_id)
1224+
.map_err(VfioError::NewIommufdVDevice)?;
1225+
1226+
// Attach the vfio cdev device to the s1_bypass_hwpt
1227+
let mut attach_data = vfio_device_attach_iommufd_pt {
1228+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1229+
flags: 0,
1230+
pt_id: viommu.bypass_hwpt_id,
1231+
};
1232+
vfio_syscall::attach_device_iommufd_pt(&device, &mut attach_data)?;
1233+
1234+
Some(vdevice)
1235+
}
1236+
};
1237+
1238+
let dev_info = VfioDeviceInfo::get_device_info(&device)?;
1239+
let dev_info = VfioDeviceInfo::new(device, &dev_info);
1240+
1241+
(dev_info, iommufd_vdevice)
1242+
};
1243+
1244+
let regions = device_info.get_regions()?;
1245+
let irqs = device_info.get_irqs()?;
1246+
1247+
Ok((
1248+
VfioDevice {
1249+
device: ManuallyDrop::new(device_info.device),
1250+
flags: device_info.flags,
1251+
regions,
1252+
irqs,
1253+
sysfspath: sysfspath.to_path_buf(),
1254+
vfio_ops,
1255+
},
1256+
iommufd_vdevice,
1257+
))
1258+
}
1259+
1260+
#[cfg(feature = "vfio_cdev")]
1261+
/// Uninstall s1 hwpt for the vfio device.
1262+
///// # Parameters
1263+
/// * `vdevice`: the `IommufdVDevice` instance associated with the vfio device.
1264+
/// * `abort`: if true, use s1 abort_hwpt; if false, use s1 bypass_hwpt.
1265+
pub fn uninstall_s1_hwpt(&self, vdevice: &mut IommufdVDevice, abort: bool) -> Result<()> {
1266+
// Attach to bypass hwpt or abort hwpt based on the 'abort' flag
1267+
let hwpt_id = if abort {
1268+
vdevice.viommu.abort_hwpt_id
1269+
} else {
1270+
vdevice.viommu.bypass_hwpt_id
1271+
};
1272+
let mut attach_data = vfio_device_attach_iommufd_pt {
1273+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1274+
flags: 0,
1275+
pt_id: hwpt_id,
1276+
};
1277+
vfio_syscall::attach_device_iommufd_pt(&self.device, &mut attach_data)?;
1278+
1279+
// Destroy s1 hwpt
1280+
vdevice
1281+
.destroy_s1_hwpt()
1282+
.map_err(VfioError::IommufdS1HwptDestroy)?;
1283+
1284+
Ok(())
1285+
}
1286+
1287+
#[cfg(feature = "vfio_cdev")]
1288+
/// Install s1 hwpt for the vfio device based on the input hwpt data.
1289+
////
1290+
/// # Parameters
1291+
/// * `vdevice`: the `IommufdVDevice` instance associated with the vfio device.
1292+
/// * `hwpt_data`: the hwpt data to create s1 hwpt.
1293+
pub fn install_s1_hwpt(
1294+
&self,
1295+
vdevice: &mut IommufdVDevice,
1296+
hwpt_data: &IommufdHwptData,
1297+
) -> Result<()> {
1298+
// Uninstall existing s1 hwpt if exists
1299+
self.uninstall_s1_hwpt(vdevice, true)?;
1300+
1301+
// Create s1 hwpt based on the input data
1302+
let s1_hwpt_id = vdevice
1303+
.allocate_s1_hwpt(hwpt_data)
1304+
.map_err(VfioError::IommufdS1HwptAlloc)?;
1305+
1306+
// Attach the vfio device to the newly created s1 hwpt
1307+
let mut attach_data = vfio_device_attach_iommufd_pt {
1308+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1309+
flags: 0,
1310+
pt_id: s1_hwpt_id,
1311+
};
1312+
vfio_syscall::attach_device_iommufd_pt(&self.device, &mut attach_data)?;
1313+
1314+
Ok(())
1315+
}
1316+
11231317
/// VFIO device reset only if the device supports being reset.
11241318
pub fn reset(&self) {
11251319
if self.flags & VFIO_DEVICE_FLAGS_RESET != 0 {

0 commit comments

Comments
 (0)