Skip to content

Commit b92bea9

Browse files
rleonkawasaki
authored andcommitted
vfio/pci: Add dma-buf export support for MMIO regions
Add support for exporting PCI device MMIO regions through dma-buf, enabling safe sharing of non-struct page memory with controlled lifetime management. This allows RDMA and other subsystems to import dma-buf FDs and build them into memory regions for PCI P2P operations. The implementation provides a revocable attachment mechanism using dma-buf move operations. MMIO regions are normally pinned as BARs don't change physical addresses, but access is revoked when the VFIO device is closed or a PCI reset is issued. This ensures kernel self-defense against potentially hostile userspace. Signed-off-by: Jason Gunthorpe <[email protected]> Signed-off-by: Vivek Kasireddy <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 3b22a62 commit b92bea9

File tree

9 files changed

+431
-5
lines changed

9 files changed

+431
-5
lines changed

drivers/vfio/pci/Kconfig

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,26 @@ config VFIO_PCI_ZDEV_KVM
5555

5656
To enable s390x KVM vfio-pci extensions, say Y.
5757

58+
config VFIO_PCI_DMABUF
59+
bool "VFIO PCI extensions for DMA-BUF"
60+
depends on VFIO_PCI_CORE
61+
depends on PCI_P2PDMA && DMA_SHARED_BUFFER
62+
default y
63+
help
64+
Enable support for VFIO PCI extensions that allow exporting
65+
device MMIO regions as DMA-BUFs for peer devices to access via
66+
peer-to-peer (P2P) DMA.
67+
68+
This feature enables a VFIO-managed PCI device to export a portion
69+
of its MMIO BAR as a DMA-BUF file descriptor, which can be passed
70+
to other userspace drivers or kernel subsystems capable of
71+
initiating DMA to that region.
72+
73+
Say Y here if you want to enable VFIO DMABUF-based MMIO export
74+
support for peer-to-peer DMA use cases.
75+
76+
If unsure, say N.
77+
5878
source "drivers/vfio/pci/mlx5/Kconfig"
5979

6080
source "drivers/vfio/pci/hisilicon/Kconfig"

drivers/vfio/pci/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
44
vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV_KVM) += vfio_pci_zdev.o
5+
56
obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
7+
vfio-pci-core-$(CONFIG_VFIO_PCI_DMABUF) += vfio_pci_dmabuf.o
68

79
vfio-pci-y := vfio_pci.o
810
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o

drivers/vfio/pci/vfio_pci_config.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -589,10 +589,12 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
589589
virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
590590
new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
591591

592-
if (!new_mem)
592+
if (!new_mem) {
593593
vfio_pci_zap_and_down_write_memory_lock(vdev);
594-
else
594+
vfio_pci_dma_buf_move(vdev, true);
595+
} else {
595596
down_write(&vdev->memory_lock);
597+
}
596598

597599
/*
598600
* If the user is writing mem/io enable (new_mem/io) and we
@@ -627,6 +629,8 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
627629
*virt_cmd &= cpu_to_le16(~mask);
628630
*virt_cmd |= cpu_to_le16(new_cmd & mask);
629631

632+
if (__vfio_pci_memory_enabled(vdev))
633+
vfio_pci_dma_buf_move(vdev, false);
630634
up_write(&vdev->memory_lock);
631635
}
632636

@@ -707,12 +711,16 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
707711
static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
708712
pci_power_t state)
709713
{
710-
if (state >= PCI_D3hot)
714+
if (state >= PCI_D3hot) {
711715
vfio_pci_zap_and_down_write_memory_lock(vdev);
712-
else
716+
vfio_pci_dma_buf_move(vdev, true);
717+
} else {
713718
down_write(&vdev->memory_lock);
719+
}
714720

715721
vfio_pci_set_power_state(vdev, state);
722+
if (__vfio_pci_memory_enabled(vdev))
723+
vfio_pci_dma_buf_move(vdev, false);
716724
up_write(&vdev->memory_lock);
717725
}
718726

@@ -900,7 +908,10 @@ static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
900908

901909
if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
902910
vfio_pci_zap_and_down_write_memory_lock(vdev);
911+
vfio_pci_dma_buf_move(vdev, true);
903912
pci_try_reset_function(vdev->pdev);
913+
if (__vfio_pci_memory_enabled(vdev))
914+
vfio_pci_dma_buf_move(vdev, true);
904915
up_write(&vdev->memory_lock);
905916
}
906917
}
@@ -982,7 +993,10 @@ static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
982993

983994
if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
984995
vfio_pci_zap_and_down_write_memory_lock(vdev);
996+
vfio_pci_dma_buf_move(vdev, true);
985997
pci_try_reset_function(vdev->pdev);
998+
if (__vfio_pci_memory_enabled(vdev))
999+
vfio_pci_dma_buf_move(vdev, true);
9861000
up_write(&vdev->memory_lock);
9871001
}
9881002
}

drivers/vfio/pci/vfio_pci_core.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@
2929
#include <linux/nospec.h>
3030
#include <linux/sched/mm.h>
3131
#include <linux/iommufd.h>
32+
#ifdef CONFIG_VFIO_PCI_DMABUF
3233
#include <linux/pci-p2pdma.h>
34+
#endif
3335
#if IS_ENABLED(CONFIG_EEH)
3436
#include <asm/eeh.h>
3537
#endif
@@ -288,6 +290,8 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
288290
* semaphore.
289291
*/
290292
vfio_pci_zap_and_down_write_memory_lock(vdev);
293+
vfio_pci_dma_buf_move(vdev, true);
294+
291295
if (vdev->pm_runtime_engaged) {
292296
up_write(&vdev->memory_lock);
293297
return -EINVAL;
@@ -371,6 +375,8 @@ static void vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev)
371375
*/
372376
down_write(&vdev->memory_lock);
373377
__vfio_pci_runtime_pm_exit(vdev);
378+
if (__vfio_pci_memory_enabled(vdev))
379+
vfio_pci_dma_buf_move(vdev, false);
374380
up_write(&vdev->memory_lock);
375381
}
376382

@@ -691,6 +697,8 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
691697
#endif
692698
vfio_pci_core_disable(vdev);
693699

700+
vfio_pci_dma_buf_cleanup(vdev);
701+
694702
mutex_lock(&vdev->igate);
695703
if (vdev->err_trigger) {
696704
eventfd_ctx_put(vdev->err_trigger);
@@ -1223,7 +1231,10 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
12231231
*/
12241232
vfio_pci_set_power_state(vdev, PCI_D0);
12251233

1234+
vfio_pci_dma_buf_move(vdev, true);
12261235
ret = pci_try_reset_function(vdev->pdev);
1236+
if (__vfio_pci_memory_enabled(vdev))
1237+
vfio_pci_dma_buf_move(vdev, false);
12271238
up_write(&vdev->memory_lock);
12281239

12291240
return ret;
@@ -1512,6 +1523,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
15121523
return vfio_pci_core_pm_exit(vdev, flags, arg, argsz);
15131524
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
15141525
return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
1526+
case VFIO_DEVICE_FEATURE_DMA_BUF:
1527+
return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
15151528
default:
15161529
return -ENOTTY;
15171530
}
@@ -2088,9 +2101,13 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
20882101
INIT_LIST_HEAD(&vdev->dummy_resources_list);
20892102
INIT_LIST_HEAD(&vdev->ioeventfds_list);
20902103
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
2104+
#ifdef CONFIG_VFIO_PCI_DMABUF
20912105
vdev->provider = pci_p2pdma_enable(vdev->pdev);
20922106
if (IS_ERR(vdev->provider))
20932107
return PTR_ERR(vdev->provider);
2108+
2109+
INIT_LIST_HEAD(&vdev->dmabufs);
2110+
#endif
20942111
init_rwsem(&vdev->memory_lock);
20952112
xa_init(&vdev->ctx);
20962113

@@ -2473,11 +2490,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
24732490
* cause the PCI config space reset without restoring the original
24742491
* state (saved locally in 'vdev->pm_save').
24752492
*/
2476-
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
2493+
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) {
2494+
vfio_pci_dma_buf_move(vdev, true);
24772495
vfio_pci_set_power_state(vdev, PCI_D0);
2496+
}
24782497

24792498
ret = pci_reset_bus(pdev);
24802499

2500+
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
2501+
if (__vfio_pci_memory_enabled(vdev))
2502+
vfio_pci_dma_buf_move(vdev, false);
2503+
24812504
vdev = list_last_entry(&dev_set->device_list,
24822505
struct vfio_pci_core_device, vdev.dev_set_list);
24832506

0 commit comments

Comments
 (0)