Skip to content

Commit c3a9795

Browse files
committed
vfio-ioctls: Support hardware-accelerated nested HWPT via iommufd
Add infrastructure to enable VFIO devices to leverage hardware IOMMU acceleration through iommufd's uAPIs. This allows userspace VMMs to attach VFIO devices to hardware-accelerated virtual IOMMUs, particularly enabling userspace to configure stage-1 (guest-managed) page tables that are composed with stage-2 (host-managed) page tables in hardware. This depends on the IommufdVIOMMU and IommufdVDevice abstractions introduced in the iommufd-ioctls crate [1]. New Public Interfaces: 1. VfioIommufd::new() signature change: - Added `s1_hwpt_data_type: Option<iommu_hwpt_data_type>` parameter - When `Some`, enables nested translation mode for subsequently attached VFIO devices - Supported types: IOMMU_HWPT_DATA_ARM_SMMUV3, IOMMU_HWPT_DATA_VTD_S1 2. VfioDevice::new_with_iommufd(): - New constructor for vfio devices backed by iommufd with hardware-accelerated nested HWPT support - Automatically creates IommufdVIommu/IommufdVDevice when nested mode is enabled via `VfioIommufd` - Supports sharing a single `IommufdVIommu` instance across multiple VFIO devices - Returns `IommufdVDevice` handle for subsequent S1 HWPT operations - Attaches device to bypass HWPT by default (until guest enables IOMMU) 3. VfioDevice::install_s1_hwpt(): - Install guest-configured stage-1 page tables into hardware - Called when guest writes to virtual IOMMU stream table entries - Atomically replaces existing S1 HWPT if present - Uses `IommufdHwptData` enum for type-safe hardware-specific configuration 4. VfioDevice::uninstall_s1_hwpt(): - Revert device to bypass or abort mode - abort=true: Use abort HWPT (fault all DMA) - abort=false: Use bypass HWPT (passthrough translation) - Called during guest IOMMU reset or shutdown Dependencies on iommufd-ioctls: This implementation builds upon three types from iommufd-ioctls [1]: - `IommufdVIommu`: Represents a physical IOMMU slice managing S2 HWPT and default S1 HWPTs (bypass/abort). Shared across devices behind the same virtual IOMMU. - `IommufdVDevice`: Represents a device attached to a `IommufdVIommu`. Handles dynamic S1 HWPT allocation and lifecycle management. - `IommufdHwptData`: Type-safe enum for architecture-specific HWPT configuration (SMMUv3 STE data, VT-d context entries). Integration Notes for VMMs: 1. VMM creates `VfioIommufd` with `s1_hwpt_data_type` if hardware accelerated virtual IOMMUs are enabled and used to manage VFIO devices 2. VMM calls `VfioDevice::new_with_iommufd()` per passthrough device - The same instance of virtual IOMMU should reuse the same instance of `IommufdVIommu` - Each VFIO device will has its own `VfioDevice` and `IommufdVDevice` instance 3. VMM need to make sure the virtual IOMMU is compatible with the physical IOMMU: - `IommufdVDevice::get_hw_info` is used to retrieve hardware information of the physical IOMMU 3. VMM traps guest IOMMU commands and calls: - `install_s1_hwpt()` when guest enables IOMMU - `uninstall_s1_hwpt()` when guest disables IOMMU - `IommufdVIommu::invalidate_hwpt()` when guest invalidate IOTLB entries This enables VMM to enable hardware-accelerated IOMMU to manage VFIO devices and use physical IOMMU hardware to directly process guest page tables. [1] cloud-hypervisor/iommufd#5 Signed-off-by: Bo Chen <bchen@crusoe.ai>
1 parent c437d21 commit c3a9795

File tree

3 files changed

+209
-3
lines changed

3 files changed

+209
-3
lines changed

vfio-ioctls/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,5 @@ mshv-bindings = { version = "0.6.5", features = [
3636
"fam-wrappers",
3737
], optional = true }
3838
mshv-ioctls = { version = "0.6.5", optional = true }
39-
iommufd-bindings = { git = "https://github.com/cloud-hypervisor/iommufd", rev = "083c016", optional = true }
40-
iommufd-ioctls = { git = "https://github.com/cloud-hypervisor/iommufd", rev = "083c016", optional = true }
39+
iommufd-bindings = { git = "https://github.com/likebreath/iommufd", branch = "0129/rfc_viommu_vdevice", optional = true }
40+
iommufd-ioctls = { git = "https://github.com/likebreath/iommufd", branch = "0129/rfc_viommu_vdevice", optional = true }

vfio-ioctls/src/lib.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,21 @@ pub enum VfioError {
167167
#[cfg(feature = "vfio_cdev")]
168168
#[error("failed iommufd ioctl")]
169169
IommufdIoctlError(#[source] IommufdError),
170+
#[cfg(feature = "vfio_cdev")]
171+
#[error("missing virt_sid for S1 HWPT setup")]
172+
MissingVirtSid,
173+
#[cfg(feature = "vfio_cdev")]
174+
#[error("failed to create iommufd vIOMMU")]
175+
NewIommufdVIommu(#[source] IommufdError),
176+
#[cfg(feature = "vfio_cdev")]
177+
#[error("failed to create iommufd vDevice")]
178+
NewIommufdVDevice(#[source] IommufdError),
179+
#[cfg(feature = "vfio_cdev")]
180+
#[error("failed to destroy s1 hwpt")]
181+
IommufdS1HwptDestroy(#[source] IommufdError),
182+
#[cfg(feature = "vfio_cdev")]
183+
#[error("failed to allocate s1 hwpt")]
184+
IommufdS1HwptAlloc(#[source] IommufdError),
170185
}
171186

172187
/// Specialized version of `Result` for VFIO subsystem.

vfio-ioctls/src/vfio_device.rs

Lines changed: 192 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use byteorder::{ByteOrder, NativeEndian};
1717
#[cfg(feature = "vfio_cdev")]
1818
use iommufd_bindings::*;
1919
#[cfg(feature = "vfio_cdev")]
20-
use iommufd_ioctls::IommuFd;
20+
use iommufd_ioctls::{IommuFd, IommufdHwptData, IommufdVDevice, IommufdVIommu};
2121
use log::{debug, error, warn};
2222
use vfio_bindings::bindings::vfio::*;
2323
use vm_memory::{Address, GuestMemory, GuestMemoryRegion, MemoryRegionAddress};
@@ -561,6 +561,7 @@ pub struct VfioIommufd {
561561
pub(crate) iommufd: Arc<IommuFd>,
562562
pub(crate) ioas_id: u32,
563563
common: VfioCommon,
564+
s1_hwpt_data_type: Option<iommu_hwpt_data_type>,
564565
}
565566

566567
#[cfg(feature = "vfio_cdev")]
@@ -576,6 +577,7 @@ impl VfioIommufd {
576577
iommufd: Arc<IommuFd>,
577578
ioas_id: Option<u32>,
578579
device_fd: Option<VfioContainerDeviceHandle>,
580+
s1_hwpt_data_type: Option<iommu_hwpt_data_type>,
579581
) -> Result<Self> {
580582
let ioas_id = match ioas_id {
581583
Some(ioas_id) => ioas_id,
@@ -599,6 +601,7 @@ impl VfioIommufd {
599601
iommufd,
600602
ioas_id,
601603
common: VfioCommon { device_fd },
604+
s1_hwpt_data_type,
602605
};
603606

604607
Ok(vfio_iommufd)
@@ -1120,6 +1123,194 @@ impl VfioDevice {
11201123
})
11211124
}
11221125

1126+
#[cfg(feature = "vfio_cdev")]
1127+
/// Creates a new VFIO device backed by IOMMUFD.
1128+
///
1129+
/// This initializes a VFIO device with support for vIOMMU and vDevice abstractions
1130+
/// when nested hardware page tables (HWPT) are configured via the `VfioIommufd` instance.
1131+
///
1132+
/// # Arguments
1133+
///
1134+
/// * `sysfspath` - Path to the VFIO device in sysfs.
1135+
/// * Note: Future versions may support file descriptor interfaces to be more versatile. *
1136+
/// * `vfio_ops` - The VFIO operations wrapper (must be a `VfioIommufd` instance).
1137+
/// * `viommu` - An optional vIOMMU instance.
1138+
/// - If `None` and nested HWPT is enabled, a new vIOMMU instance is created and returned.
1139+
/// - If `Some`, the provided instance is reused.
1140+
/// * Note: The reused vIOMMU must be associated with the same physical IOMMU as this device;
1141+
/// otherwise, vDevice creation will fail and error will be returned.*
1142+
/// * `virt_sid` - The Virtual Stream ID. This is required if `s1_hwpt_data_type` is
1143+
/// configured in the `VfioIommufd` instance (i.e., nested HWPT is active).
1144+
///
1145+
/// # Returns
1146+
///
1147+
/// A tuple containing the initialized `VfioDevice` and an optional `IommufdVDevice`
1148+
/// (present only when nested HWPT is configured).
1149+
pub fn new_with_iommufd(
1150+
sysfspath: &Path,
1151+
vfio_ops: Arc<dyn VfioOps>,
1152+
viommu: &mut Option<Arc<IommufdVIommu>>,
1153+
virt_sid: Option<u64>,
1154+
) -> Result<(Self, Option<IommufdVDevice>)> {
1155+
let vfio_iommufd =
1156+
if let Some(vfio_iommufd) = vfio_ops.as_any().downcast_ref::<VfioIommufd>() {
1157+
vfio_iommufd
1158+
} else {
1159+
return Err(VfioError::DowncastVfioOps);
1160+
};
1161+
1162+
let (device_info, iommufd_vdevice) = {
1163+
// Open the vfio cdev file
1164+
let device = Self::get_device_cdev_from_path(sysfspath)?;
1165+
1166+
// Add the vfio cdev file to VFIO-KVM device tracking
1167+
vfio_iommufd
1168+
.common
1169+
.device_set_fd(device.as_raw_fd(), true)?;
1170+
1171+
// Bind the VFIO device to the iommufd file
1172+
let mut bind = vfio_device_bind_iommufd {
1173+
argsz: mem::size_of::<vfio_device_bind_iommufd>() as u32,
1174+
flags: 0,
1175+
iommufd: vfio_iommufd.iommufd.as_raw_fd(),
1176+
out_devid: 0,
1177+
};
1178+
vfio_syscall::bind_device_iommufd(&device, &mut bind)?;
1179+
1180+
let iommufd_vdevice = match vfio_iommufd.s1_hwpt_data_type {
1181+
// When no s1 hwpt is used, associate the vfio device to the IOAS within the bound iommufd
1182+
None => {
1183+
let mut attach_data = vfio_device_attach_iommufd_pt {
1184+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1185+
flags: 0,
1186+
pt_id: vfio_iommufd.ioas_id,
1187+
};
1188+
vfio_syscall::attach_device_iommufd_pt(&device, &mut attach_data)?;
1189+
1190+
None
1191+
}
1192+
// When s1 hwpt is used, create and attach vIOMMU and vDevice for nested (s1+s2) hwpt setup
1193+
Some(s1_hwpt_data_type) => {
1194+
let virt_id = if let Some(virt_sid) = virt_sid {
1195+
virt_sid
1196+
} else {
1197+
return Err(VfioError::MissingVirtSid);
1198+
};
1199+
1200+
let viommu = if let Some(viommu) = viommu {
1201+
// Reuse the passed in vIOMMU instance if available
1202+
viommu.clone()
1203+
} else {
1204+
// Allocate an instance of vIOMMU for the vfio device if no instance is passed in
1205+
let new_viommu = IommufdVIommu::new(
1206+
vfio_iommufd.iommufd.clone(),
1207+
vfio_iommufd.ioas_id,
1208+
bind.out_devid,
1209+
s1_hwpt_data_type,
1210+
)
1211+
.map_err(VfioError::NewIommufdVIommu)?;
1212+
1213+
let viommu_arc = Arc::new(new_viommu);
1214+
*viommu = Some(viommu_arc.clone());
1215+
1216+
viommu_arc
1217+
};
1218+
1219+
// Allocate an instance of vDevice
1220+
let vdevice = IommufdVDevice::new(viommu.clone(), bind.out_devid, virt_id)
1221+
.map_err(VfioError::NewIommufdVDevice)?;
1222+
1223+
// Attach the vfio cdev device to the s1_bypass_hwpt
1224+
let mut attach_data = vfio_device_attach_iommufd_pt {
1225+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1226+
flags: 0,
1227+
pt_id: viommu.bypass_hwpt_id,
1228+
};
1229+
vfio_syscall::attach_device_iommufd_pt(&device, &mut attach_data)?;
1230+
1231+
Some(vdevice)
1232+
}
1233+
};
1234+
1235+
let dev_info = VfioDeviceInfo::get_device_info(&device)?;
1236+
let dev_info = VfioDeviceInfo::new(device, &dev_info);
1237+
1238+
(dev_info, iommufd_vdevice)
1239+
};
1240+
1241+
let regions = device_info.get_regions()?;
1242+
let irqs = device_info.get_irqs()?;
1243+
1244+
Ok((
1245+
VfioDevice {
1246+
device: ManuallyDrop::new(device_info.device),
1247+
flags: device_info.flags,
1248+
regions,
1249+
irqs,
1250+
sysfspath: sysfspath.to_path_buf(),
1251+
vfio_ops,
1252+
},
1253+
iommufd_vdevice,
1254+
))
1255+
}
1256+
1257+
#[cfg(feature = "vfio_cdev")]
1258+
/// Uninstall s1 hwpt for the vfio device.
1259+
///// # Parameters
1260+
/// * `vdevice`: the `IommufdVDevice` instance associated with the vfio device.
1261+
/// * `abort`: if true, use s1 abort_hwpt; if false, use s1 bypass_hwpt.
1262+
pub fn uninstall_s1_hwpt(&self, vdevice: &mut IommufdVDevice, abort: bool) -> Result<()> {
1263+
// Attach to bypass hwpt or abort hwpt based on the 'abort' flag
1264+
let hwpt_id = if abort {
1265+
vdevice.viommu.abort_hwpt_id
1266+
} else {
1267+
vdevice.viommu.bypass_hwpt_id
1268+
};
1269+
let mut attach_data = vfio_device_attach_iommufd_pt {
1270+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1271+
flags: 0,
1272+
pt_id: hwpt_id,
1273+
};
1274+
vfio_syscall::attach_device_iommufd_pt(&self.device, &mut attach_data)?;
1275+
1276+
// Destroy s1 hwpt
1277+
vdevice
1278+
.destroy_s1_hwpt()
1279+
.map_err(VfioError::IommufdS1HwptDestroy)?;
1280+
1281+
Ok(())
1282+
}
1283+
1284+
#[cfg(feature = "vfio_cdev")]
1285+
/// Install s1 hwpt for the vfio device based on the input hwpt data.
1286+
////
1287+
/// # Parameters
1288+
/// * `vdevice`: the `IommufdVDevice` instance associated with the vfio device.
1289+
/// * `hwpt_data`: the hwpt data to create s1 hwpt.
1290+
pub fn install_s1_hwpt(
1291+
&self,
1292+
vdevice: &mut IommufdVDevice,
1293+
hwpt_data: &IommufdHwptData,
1294+
) -> Result<()> {
1295+
// Uninstall existing s1 hwpt if exists
1296+
self.uninstall_s1_hwpt(vdevice, true)?;
1297+
1298+
// Create s1 hwpt based on the input data
1299+
let s1_hwpt_id = vdevice
1300+
.allocate_s1_hwpt(hwpt_data)
1301+
.map_err(VfioError::IommufdS1HwptAlloc)?;
1302+
1303+
// Attach the vfio device to the newly created s1 hwpt
1304+
let mut attach_data = vfio_device_attach_iommufd_pt {
1305+
argsz: mem::size_of::<vfio_device_attach_iommufd_pt>() as u32,
1306+
flags: 0,
1307+
pt_id: s1_hwpt_id,
1308+
};
1309+
vfio_syscall::attach_device_iommufd_pt(&self.device, &mut attach_data)?;
1310+
1311+
Ok(())
1312+
}
1313+
11231314
/// VFIO device reset only if the device supports being reset.
11241315
pub fn reset(&self) {
11251316
if self.flags & VFIO_DEVICE_FLAGS_RESET != 0 {

0 commit comments

Comments
 (0)