diff --git a/resources/hiding_ci/kernel_commit_hash b/resources/hiding_ci/kernel_commit_hash index 39d6afaaf51..d5c975dba9e 100644 --- a/resources/hiding_ci/kernel_commit_hash +++ b/resources/hiding_ci/kernel_commit_hash @@ -1 +1 @@ -4701f33a10702d5fc577c32434eb62adde0a1ae1 +86731a2a651e58953fc949573895f2fa6d456841 \ No newline at end of file diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0001-mm-Consolidate-freeing-of-typed-folios-on-final-foli.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0001-mm-Consolidate-freeing-of-typed-folios-on-final-foli.patch deleted file mode 100644 index 4d4b5572d8a..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0001-mm-Consolidate-freeing-of-typed-folios-on-final-foli.patch +++ /dev/null @@ -1,109 +0,0 @@ -From f9ca710b51263ce8317cc2fa02232e456fa1f39c Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:15 +0000 -Subject: [PATCH 01/26] mm: Consolidate freeing of typed folios on final - folio_put() - -Some folio types, such as hugetlb, handle freeing their own -folios. Moreover, guest_memfd will require being notified once a -folio's reference count reaches 0 to facilitate shared to private -folio conversion, without the folio actually being freed at that -point. - -As a first step towards that, this patch consolidates freeing -folios that have a type. The first user is hugetlb folios. Later -in this patch series, guest_memfd will become the second user of -this. - -Suggested-by: David Hildenbrand -Acked-by: Vlastimil Babka -Acked-by: David Hildenbrand -Signed-off-by: Fuad Tabba ---- - include/linux/page-flags.h | 15 +++++++++++++++ - mm/swap.c | 23 ++++++++++++++++++----- - 2 files changed, 33 insertions(+), 5 deletions(-) - -diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h -index 36d283552f80..6dc2494bd002 100644 ---- a/include/linux/page-flags.h -+++ b/include/linux/page-flags.h -@@ -953,6 +953,21 @@ static inline bool page_has_type(const struct page *page) - return page_mapcount_is_type(data_race(page->page_type)); - } - -+static inline int page_get_type(const struct page *page) -+{ -+ return page->page_type >> 24; -+} -+ -+static inline bool folio_has_type(const struct folio *folio) -+{ -+ return page_has_type(&folio->page); -+} -+ -+static inline int folio_get_type(const struct folio *folio) -+{ -+ return page_get_type(&folio->page); -+} -+ - #define FOLIO_TYPE_OPS(lname, fname) \ - static __always_inline bool folio_test_##fname(const struct folio *folio) \ - { \ -diff --git a/mm/swap.c b/mm/swap.c -index fc8281ef4241..47bc1bb919cc 100644 ---- a/mm/swap.c -+++ b/mm/swap.c -@@ -94,6 +94,19 @@ static void page_cache_release(struct folio *folio) - unlock_page_lruvec_irqrestore(lruvec, flags); - } - -+static void free_typed_folio(struct folio *folio) -+{ -+ switch (folio_get_type(folio)) { -+#ifdef CONFIG_HUGETLBFS -+ case PGTY_hugetlb: -+ free_huge_folio(folio); -+ return; -+#endif -+ default: -+ WARN_ON_ONCE(1); -+ } -+} -+ - void __folio_put(struct folio *folio) - { - if (unlikely(folio_is_zone_device(folio))) { -@@ -101,8 +114,8 @@ void __folio_put(struct folio *folio) - return; - } - -- if (folio_test_hugetlb(folio)) { -- free_huge_folio(folio); -+ if (unlikely(folio_has_type(folio))) { -+ free_typed_folio(folio); - return; - } - -@@ -966,13 +979,13 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) - if (!folio_ref_sub_and_test(folio, nr_refs)) - continue; - -- /* hugetlb has its own memcg */ -- if (folio_test_hugetlb(folio)) { -+ if (unlikely(folio_has_type(folio))) { -+ /* typed folios have their own memcg, if any */ - if (lruvec) { - unlock_page_lruvec_irqrestore(lruvec, flags); - lruvec = NULL; - } -- free_huge_folio(folio); -+ free_typed_folio(folio); - continue; - } - folio_unqueue_deferred_split(folio); - -base-commit: 4701f33a10702d5fc577c32434eb62adde0a1ae1 --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0002-KVM-Rename-CONFIG_KVM_PRIVATE_MEM-to-CONFIG_KVM_GMEM.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0002-KVM-Rename-CONFIG_KVM_PRIVATE_MEM-to-CONFIG_KVM_GMEM.patch new file mode 100644 index 00000000000..953a109ffbb --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0002-KVM-Rename-CONFIG_KVM_PRIVATE_MEM-to-CONFIG_KVM_GMEM.patch @@ -0,0 +1,171 @@ +From ba45bc1cd4624badfab75d73286d753403b3cfb5 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:13 +0100 +Subject: [PATCH 02/42] KVM: Rename CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GMEM + +The option KVM_PRIVATE_MEM enables guest_memfd in general. Subsequent +patches add shared memory support to guest_memfd. Therefore, rename it +to KVM_GMEM to make its purpose clearer. + +Reviewed-by: Ira Weiny +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + arch/x86/include/asm/kvm_host.h | 2 +- + include/linux/kvm_host.h | 10 +++++----- + virt/kvm/Kconfig | 8 ++++---- + virt/kvm/Makefile.kvm | 2 +- + virt/kvm/kvm_main.c | 4 ++-- + virt/kvm/kvm_mm.h | 4 ++-- + 6 files changed, 15 insertions(+), 15 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index b4a391929cdb..6e0bbf4c2202 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -2269,7 +2269,7 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + int tdp_max_root_level, int tdp_huge_page_level); + + +-#ifdef CONFIG_KVM_PRIVATE_MEM ++#ifdef CONFIG_KVM_GMEM + #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) + #else + #define kvm_arch_has_private_mem(kvm) false +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 3bde4fb5c6aa..b2c415e81e2e 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -601,7 +601,7 @@ struct kvm_memory_slot { + short id; + u16 as_id; + +-#ifdef CONFIG_KVM_PRIVATE_MEM ++#ifdef CONFIG_KVM_GMEM + struct { + /* + * Writes protected by kvm->slots_lock. Acquiring a +@@ -722,7 +722,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) + * Arch code must define kvm_arch_has_private_mem if support for private memory + * is enabled. + */ +-#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) ++#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GMEM) + static inline bool kvm_arch_has_private_mem(struct kvm *kvm) + { + return false; +@@ -2527,7 +2527,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + + static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) + { +- return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && ++ return IS_ENABLED(CONFIG_KVM_GMEM) && + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; + } + #else +@@ -2537,7 +2537,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) + } + #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + +-#ifdef CONFIG_KVM_PRIVATE_MEM ++#ifdef CONFIG_KVM_GMEM + int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, struct page **page, + int *max_order); +@@ -2550,7 +2550,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, + KVM_BUG_ON(1, kvm); + return -EIO; + } +-#endif /* CONFIG_KVM_PRIVATE_MEM */ ++#endif /* CONFIG_KVM_GMEM */ + + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE + int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); +diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig +index 727b542074e7..49df4e32bff7 100644 +--- a/virt/kvm/Kconfig ++++ b/virt/kvm/Kconfig +@@ -112,19 +112,19 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES + depends on KVM_GENERIC_MMU_NOTIFIER + bool + +-config KVM_PRIVATE_MEM ++config KVM_GMEM + select XARRAY_MULTI + bool + + config KVM_GENERIC_PRIVATE_MEM + select KVM_GENERIC_MEMORY_ATTRIBUTES +- select KVM_PRIVATE_MEM ++ select KVM_GMEM + bool + + config HAVE_KVM_ARCH_GMEM_PREPARE + bool +- depends on KVM_PRIVATE_MEM ++ depends on KVM_GMEM + + config HAVE_KVM_ARCH_GMEM_INVALIDATE + bool +- depends on KVM_PRIVATE_MEM ++ depends on KVM_GMEM +diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm +index 724c89af78af..8d00918d4c8b 100644 +--- a/virt/kvm/Makefile.kvm ++++ b/virt/kvm/Makefile.kvm +@@ -12,4 +12,4 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o + kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o + kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o + kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o +-kvm-$(CONFIG_KVM_PRIVATE_MEM) += $(KVM)/guest_memfd.o ++kvm-$(CONFIG_KVM_GMEM) += $(KVM)/guest_memfd.o +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index eec82775c5bf..898c3d5a7ba8 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -4910,7 +4910,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) + case KVM_CAP_MEMORY_ATTRIBUTES: + return kvm_supported_mem_attributes(kvm); + #endif +-#ifdef CONFIG_KVM_PRIVATE_MEM ++#ifdef CONFIG_KVM_GMEM + case KVM_CAP_GUEST_MEMFD: + return !kvm || kvm_arch_has_private_mem(kvm); + #endif +@@ -5344,7 +5344,7 @@ static long kvm_vm_ioctl(struct file *filp, + case KVM_GET_STATS_FD: + r = kvm_vm_ioctl_get_stats_fd(kvm); + break; +-#ifdef CONFIG_KVM_PRIVATE_MEM ++#ifdef CONFIG_KVM_GMEM + case KVM_CREATE_GUEST_MEMFD: { + struct kvm_create_guest_memfd guest_memfd; + +diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h +index acef3f5c582a..ec311c0d6718 100644 +--- a/virt/kvm/kvm_mm.h ++++ b/virt/kvm/kvm_mm.h +@@ -67,7 +67,7 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, + } + #endif /* HAVE_KVM_PFNCACHE */ + +-#ifdef CONFIG_KVM_PRIVATE_MEM ++#ifdef CONFIG_KVM_GMEM + void kvm_gmem_init(struct module *module); + int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); + int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, +@@ -91,6 +91,6 @@ static inline void kvm_gmem_unbind(struct kvm_memory_slot *slot) + { + WARN_ON_ONCE(1); + } +-#endif /* CONFIG_KVM_PRIVATE_MEM */ ++#endif /* CONFIG_KVM_GMEM */ + + #endif /* __KVM_MM_H__ */ +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0002-KVM-guest_memfd-Handle-final-folio_put-of-guest_memf.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0002-KVM-guest_memfd-Handle-final-folio_put-of-guest_memf.patch deleted file mode 100644 index d5778165add..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0002-KVM-guest_memfd-Handle-final-folio_put-of-guest_memf.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 9a4d7cd855d14e1522f363e3e04ebb9fa0a90ff0 Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:16 +0000 -Subject: [PATCH 02/26] KVM: guest_memfd: Handle final folio_put() of - guest_memfd pages - -Before transitioning a guest_memfd folio to unshared, thereby -disallowing access by the host and allowing the hypervisor to -transition its view of the guest page as private, we need to be -sure that the host doesn't have any references to the folio. - -This patch introduces a new type for guest_memfd folios, which -isn't activated in this series but is here as a placeholder and -to facilitate the code in the subsequent patch series. This will -be used in the future to register a callback that informs the -guest_memfd subsystem when the last reference is dropped, -therefore knowing that the host doesn't have any remaining -references. - -This patch also introduces the configuration option, -KVM_GMEM_SHARED_MEM, which toggles support for mapping -guest_memfd shared memory at the host. - -Signed-off-by: Fuad Tabba -Acked-by: Vlastimil Babka -Acked-by: David Hildenbrand ---- - include/linux/kvm_host.h | 4 ++++ - include/linux/page-flags.h | 16 ++++++++++++++++ - mm/debug.c | 1 + - mm/swap.c | 29 +++++++++++++++++++++++++++++ - virt/kvm/Kconfig | 4 ++++ - virt/kvm/guest_memfd.c | 8 ++++++++ - 6 files changed, 62 insertions(+) - -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index f34f4cfaa513..3ad0719bfc4f 100644 ---- a/include/linux/kvm_host.h -+++ b/include/linux/kvm_host.h -@@ -2571,4 +2571,8 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, - struct kvm_pre_fault_memory *range); - #endif - -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+void kvm_gmem_handle_folio_put(struct folio *folio); -+#endif -+ - #endif -diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h -index 6dc2494bd002..daeee9a38e4c 100644 ---- a/include/linux/page-flags.h -+++ b/include/linux/page-flags.h -@@ -933,6 +933,7 @@ enum pagetype { - PGTY_slab = 0xf5, - PGTY_zsmalloc = 0xf6, - PGTY_unaccepted = 0xf7, -+ PGTY_guestmem = 0xf8, - - PGTY_mapcount_underflow = 0xff - }; -@@ -1082,6 +1083,21 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb) - FOLIO_TEST_FLAG_FALSE(hugetlb) - #endif - -+/* -+ * guestmem folios are used to back VM memory as managed by guest_memfd. Once -+ * the last reference is put, instead of freeing these folios back to the page -+ * allocator, they are returned to guest_memfd. -+ * -+ * For now, guestmem will only be set on these folios as long as they cannot be -+ * mapped to user space ("private state"), with the plan of always setting that -+ * type once typed folios can be mapped to user space cleanly. -+ */ -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+FOLIO_TYPE_OPS(guestmem, guestmem) -+#else -+FOLIO_TEST_FLAG_FALSE(guestmem) -+#endif -+ - PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) - - /* -diff --git a/mm/debug.c b/mm/debug.c -index 8d2acf432385..08bc42c6cba8 100644 ---- a/mm/debug.c -+++ b/mm/debug.c -@@ -56,6 +56,7 @@ static const char *page_type_names[] = { - DEF_PAGETYPE_NAME(table), - DEF_PAGETYPE_NAME(buddy), - DEF_PAGETYPE_NAME(unaccepted), -+ DEF_PAGETYPE_NAME(guestmem), - }; - - static const char *page_type_name(unsigned int page_type) -diff --git a/mm/swap.c b/mm/swap.c -index 47bc1bb919cc..d8fda3948684 100644 ---- a/mm/swap.c -+++ b/mm/swap.c -@@ -38,6 +38,10 @@ - #include - #include - -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+#include -+#endif -+ - #include "internal.h" - - #define CREATE_TRACE_POINTS -@@ -94,6 +98,26 @@ static void page_cache_release(struct folio *folio) - unlock_page_lruvec_irqrestore(lruvec, flags); - } - -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+static void gmem_folio_put(struct folio *folio) -+{ -+ /* -+ * Perform the callback only as long as the KVM module is still loaded. -+ * As long as the folio mapping is set, the folio is associated with a -+ * guest_memfd inode. -+ */ -+ if (folio->mapping) -+ kvm_gmem_handle_folio_put(folio); -+ -+ /* -+ * If there are no references to the folio left, it's not associated -+ * with a guest_memfd inode anymore. -+ */ -+ if (folio_ref_count(folio) == 0) -+ __folio_put(folio); -+} -+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */ -+ - static void free_typed_folio(struct folio *folio) - { - switch (folio_get_type(folio)) { -@@ -101,6 +125,11 @@ static void free_typed_folio(struct folio *folio) - case PGTY_hugetlb: - free_huge_folio(folio); - return; -+#endif -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+ case PGTY_guestmem: -+ gmem_folio_put(folio); -+ return; - #endif - default: - WARN_ON_ONCE(1); -diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig -index 54e959e7d68f..4e759e8020c5 100644 ---- a/virt/kvm/Kconfig -+++ b/virt/kvm/Kconfig -@@ -124,3 +124,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE - config HAVE_KVM_ARCH_GMEM_INVALIDATE - bool - depends on KVM_PRIVATE_MEM -+ -+config KVM_GMEM_SHARED_MEM -+ select KVM_PRIVATE_MEM -+ bool -diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index b2aa6bf24d3a..5fc414becae5 100644 ---- a/virt/kvm/guest_memfd.c -+++ b/virt/kvm/guest_memfd.c -@@ -13,6 +13,14 @@ struct kvm_gmem { - struct list_head entry; - }; - -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+void kvm_gmem_handle_folio_put(struct folio *folio) -+{ -+ WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress."); -+} -+EXPORT_SYMBOL_GPL(kvm_gmem_handle_folio_put); -+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */ -+ - /** - * folio_file_pfn - like folio_file_page, but return a pfn. - * @folio: The folio which contains this index. --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0003-KVM-Rename-CONFIG_KVM_GENERIC_PRIVATE_MEM-to-CONFIG_.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0003-KVM-Rename-CONFIG_KVM_GENERIC_PRIVATE_MEM-to-CONFIG_.patch new file mode 100644 index 00000000000..4f3e49ef2bf --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0003-KVM-Rename-CONFIG_KVM_GENERIC_PRIVATE_MEM-to-CONFIG_.patch @@ -0,0 +1,88 @@ +From 3d9f3ec523f188f416761e52a5c47f7a7b457ad0 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:14 +0100 +Subject: [PATCH 03/42] KVM: Rename CONFIG_KVM_GENERIC_PRIVATE_MEM to + CONFIG_KVM_GENERIC_GMEM_POPULATE + +The option KVM_GENERIC_PRIVATE_MEM enables populating a GPA range with +guest data. Rename it to KVM_GENERIC_GMEM_POPULATE to make its purpose +clearer. + +Reviewed-by: Ira Weiny +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + arch/x86/kvm/Kconfig | 4 ++-- + include/linux/kvm_host.h | 2 +- + virt/kvm/Kconfig | 2 +- + virt/kvm/guest_memfd.c | 2 +- + 4 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig +index 2eeffcec5382..9151cd82adab 100644 +--- a/arch/x86/kvm/Kconfig ++++ b/arch/x86/kvm/Kconfig +@@ -46,7 +46,7 @@ config KVM_X86 + select HAVE_KVM_PM_NOTIFIER if PM + select KVM_GENERIC_HARDWARE_ENABLING + select KVM_GENERIC_PRE_FAULT_MEMORY +- select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM ++ select KVM_GENERIC_GMEM_POPULATE if KVM_SW_PROTECTED_VM + select KVM_WERROR if WERROR + + config KVM +@@ -157,7 +157,7 @@ config KVM_AMD_SEV + depends on KVM_AMD && X86_64 + depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) + select ARCH_HAS_CC_PLATFORM +- select KVM_GENERIC_PRIVATE_MEM ++ select KVM_GENERIC_GMEM_POPULATE + select HAVE_KVM_ARCH_GMEM_PREPARE + select HAVE_KVM_ARCH_GMEM_INVALIDATE + help +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index b2c415e81e2e..7700efc06e35 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -2556,7 +2556,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, + int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); + #endif + +-#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM ++#ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE + /** + * kvm_gmem_populate() - Populate/prepare a GPA range with guest data + * +diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig +index 49df4e32bff7..559c93ad90be 100644 +--- a/virt/kvm/Kconfig ++++ b/virt/kvm/Kconfig +@@ -116,7 +116,7 @@ config KVM_GMEM + select XARRAY_MULTI + bool + +-config KVM_GENERIC_PRIVATE_MEM ++config KVM_GENERIC_GMEM_POPULATE + select KVM_GENERIC_MEMORY_ATTRIBUTES + select KVM_GMEM + bool +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index b2aa6bf24d3a..befea51bbc75 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -638,7 +638,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + } + EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); + +-#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM ++#ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE + long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque) + { +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0003-KVM-guest_memfd-Allow-host-to-map-guest_memfd-pages.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0003-KVM-guest_memfd-Allow-host-to-map-guest_memfd-pages.patch deleted file mode 100644 index 13d7180fa19..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0003-KVM-guest_memfd-Allow-host-to-map-guest_memfd-pages.patch +++ /dev/null @@ -1,193 +0,0 @@ -From fd39febef2e0d41394e51f5e34f2c8de80b3b4dc Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:17 +0000 -Subject: [PATCH 03/26] KVM: guest_memfd: Allow host to map guest_memfd() pages - -Add support for mmap() and fault() for guest_memfd backed memory -in the host for VMs that support in-place conversion between -shared and private. To that end, this patch adds the ability to -check whether the VM type supports in-place conversion, and only -allows mapping its memory if that's the case. - -Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which -indicates that the VM supports shared memory in guest_memfd, or -that the host can create VMs that support shared memory. -Supporting shared memory implies that memory can be mapped when -shared with the host. - -This is controlled by the KVM_GMEM_SHARED_MEM configuration -option. - -Signed-off-by: Fuad Tabba ---- - include/linux/kvm_host.h | 11 +++++ - include/uapi/linux/kvm.h | 1 + - virt/kvm/guest_memfd.c | 101 +++++++++++++++++++++++++++++++++++++++ - virt/kvm/kvm_main.c | 4 ++ - 4 files changed, 117 insertions(+) - -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 3ad0719bfc4f..601bbcaa5e41 100644 ---- a/include/linux/kvm_host.h -+++ b/include/linux/kvm_host.h -@@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) - } - #endif - -+/* -+ * Arch code must define kvm_arch_gmem_supports_shared_mem if support for -+ * private memory is enabled and it supports in-place shared/private conversion. -+ */ -+#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) -+static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm) -+{ -+ return false; -+} -+#endif -+ - #ifndef kvm_arch_has_readonly_mem - static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) - { -diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h -index 45e6d8fca9b9..117937a895da 100644 ---- a/include/uapi/linux/kvm.h -+++ b/include/uapi/linux/kvm.h -@@ -929,6 +929,7 @@ struct kvm_enable_cap { - #define KVM_CAP_PRE_FAULT_MEMORY 236 - #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 - #define KVM_CAP_X86_GUEST_MODE 238 -+#define KVM_CAP_GMEM_SHARED_MEM 239 - - struct kvm_irq_routing_irqchip { - __u32 irqchip; -diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index 5fc414becae5..fbf89e643add 100644 ---- a/virt/kvm/guest_memfd.c -+++ b/virt/kvm/guest_memfd.c -@@ -320,7 +320,108 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) - return gfn - slot->base_gfn + slot->gmem.pgoff; - } - -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index) -+{ -+ struct kvm_gmem *gmem = file->private_data; -+ -+ -+ /* For now, VMs that support shared memory share all their memory. */ -+ return kvm_arch_gmem_supports_shared_mem(gmem->kvm); -+} -+ -+static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) -+{ -+ struct inode *inode = file_inode(vmf->vma->vm_file); -+ struct folio *folio; -+ vm_fault_t ret = VM_FAULT_LOCKED; -+ -+ filemap_invalidate_lock_shared(inode->i_mapping); -+ -+ folio = kvm_gmem_get_folio(inode, vmf->pgoff); -+ if (IS_ERR(folio)) { -+ int err = PTR_ERR(folio); -+ -+ if (err == -EAGAIN) -+ ret = VM_FAULT_RETRY; -+ else -+ ret = vmf_error(err); -+ -+ goto out_filemap; -+ } -+ -+ if (folio_test_hwpoison(folio)) { -+ ret = VM_FAULT_HWPOISON; -+ goto out_folio; -+ } -+ -+ if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) { -+ ret = VM_FAULT_SIGBUS; -+ goto out_folio; -+ } -+ -+ /* -+ * Shared folios would not be marked as "guestmem" so far, and we only -+ * expect shared folios at this point. -+ */ -+ if (WARN_ON_ONCE(folio_test_guestmem(folio))) { -+ ret = VM_FAULT_SIGBUS; -+ goto out_folio; -+ } -+ -+ /* No support for huge pages. */ -+ if (WARN_ON_ONCE(folio_test_large(folio))) { -+ ret = VM_FAULT_SIGBUS; -+ goto out_folio; -+ } -+ -+ if (!folio_test_uptodate(folio)) { -+ clear_highpage(folio_page(folio, 0)); -+ kvm_gmem_mark_prepared(folio); -+ } -+ -+ vmf->page = folio_file_page(folio, vmf->pgoff); -+ -+out_folio: -+ if (ret != VM_FAULT_LOCKED) { -+ folio_unlock(folio); -+ folio_put(folio); -+ } -+ -+out_filemap: -+ filemap_invalidate_unlock_shared(inode->i_mapping); -+ -+ return ret; -+} -+ -+static const struct vm_operations_struct kvm_gmem_vm_ops = { -+ .fault = kvm_gmem_fault, -+}; -+ -+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ struct kvm_gmem *gmem = file->private_data; -+ -+ if (!kvm_arch_gmem_supports_shared_mem(gmem->kvm)) -+ return -ENODEV; -+ -+ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != -+ (VM_SHARED | VM_MAYSHARE)) { -+ return -EINVAL; -+ } -+ -+ file_accessed(file); -+ vm_flags_set(vma, VM_DONTDUMP); -+ vma->vm_ops = &kvm_gmem_vm_ops; -+ -+ return 0; -+} -+#else -+#define kvm_gmem_mmap NULL -+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */ -+ - static struct file_operations kvm_gmem_fops = { -+ .mmap = kvm_gmem_mmap, - .open = generic_file_open, - .release = kvm_gmem_release, - .fallocate = kvm_gmem_fallocate, -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index ba0327e2d0d3..38f0f402ea46 100644 ---- a/virt/kvm/kvm_main.c -+++ b/virt/kvm/kvm_main.c -@@ -4830,6 +4830,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) - #ifdef CONFIG_KVM_PRIVATE_MEM - case KVM_CAP_GUEST_MEMFD: - return !kvm || kvm_arch_has_private_mem(kvm); -+#endif -+#ifdef CONFIG_KVM_GMEM_SHARED_MEM -+ case KVM_CAP_GMEM_SHARED_MEM: -+ return !kvm || kvm_arch_gmem_supports_shared_mem(kvm); - #endif - default: - break; --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0004-KVM-Rename-kvm_arch_has_private_mem-to-kvm_arch_supp.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0004-KVM-Rename-kvm_arch_has_private_mem-to-kvm_arch_supp.patch new file mode 100644 index 00000000000..f055a1658ce --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0004-KVM-Rename-kvm_arch_has_private_mem-to-kvm_arch_supp.patch @@ -0,0 +1,147 @@ +From e5c3c69a2795226cfd06a78acae8934de610eb21 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:15 +0100 +Subject: [PATCH 04/42] KVM: Rename kvm_arch_has_private_mem() to + kvm_arch_supports_gmem() + +The function kvm_arch_has_private_mem() indicates whether an architecture +supports guest_memfd. Until now, this support implied the memory was +strictly private. + +To decouple guest_memfd support from memory privacy, rename this +function to kvm_arch_supports_gmem(). + +Reviewed-by: Ira Weiny +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + arch/x86/include/asm/kvm_host.h | 8 ++++---- + arch/x86/kvm/mmu/mmu.c | 8 ++++---- + include/linux/kvm_host.h | 6 +++--- + virt/kvm/kvm_main.c | 6 +++--- + 4 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 6e0bbf4c2202..3d69da6d2d9e 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -2270,9 +2270,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + + + #ifdef CONFIG_KVM_GMEM +-#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) ++#define kvm_arch_supports_gmem(kvm) ((kvm)->arch.has_private_mem) + #else +-#define kvm_arch_has_private_mem(kvm) false ++#define kvm_arch_supports_gmem(kvm) false + #endif + + #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) +@@ -2325,8 +2325,8 @@ enum { + #define HF_SMM_INSIDE_NMI_MASK (1 << 2) + + # define KVM_MAX_NR_ADDRESS_SPACES 2 +-/* SMM is currently unsupported for guests with private memory. */ +-# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2) ++/* SMM is currently unsupported for guests with guest_memfd (esp private) memory. */ ++# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_supports_gmem(kvm) ? 1 : 2) + # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) + # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) + #else +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index 4e06e2e89a8f..b13f76630b1a 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -4915,7 +4915,7 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, + return r; + + direct_bits = 0; +- if (kvm_arch_has_private_mem(vcpu->kvm) && ++ if (kvm_arch_supports_gmem(vcpu->kvm) && + kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) + error_code |= PFERR_PRIVATE_ACCESS; + else +@@ -7714,7 +7714,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + * Zapping SPTEs in this case ensures KVM will reassess whether or not + * a hugepage can be used for affected ranges. + */ +- if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) ++ if (WARN_ON_ONCE(!kvm_arch_supports_gmem(kvm))) + return false; + + if (WARN_ON_ONCE(range->end <= range->start)) +@@ -7793,7 +7793,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + * a range that has PRIVATE GFNs, and conversely converting a range to + * SHARED may now allow hugepages. + */ +- if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) ++ if (WARN_ON_ONCE(!kvm_arch_supports_gmem(kvm))) + return false; + + /* +@@ -7849,7 +7849,7 @@ void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, + { + int level; + +- if (!kvm_arch_has_private_mem(kvm)) ++ if (!kvm_arch_supports_gmem(kvm)) + return; + + for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 7700efc06e35..a0e661aa3f8a 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -719,11 +719,11 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) + #endif + + /* +- * Arch code must define kvm_arch_has_private_mem if support for private memory ++ * Arch code must define kvm_arch_supports_gmem if support for guest_memfd + * is enabled. + */ +-#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GMEM) +-static inline bool kvm_arch_has_private_mem(struct kvm *kvm) ++#if !defined(kvm_arch_supports_gmem) && !IS_ENABLED(CONFIG_KVM_GMEM) ++static inline bool kvm_arch_supports_gmem(struct kvm *kvm) + { + return false; + } +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 898c3d5a7ba8..6efbea208fa6 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1588,7 +1588,7 @@ static int check_memory_region_flags(struct kvm *kvm, + { + u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; + +- if (kvm_arch_has_private_mem(kvm)) ++ if (kvm_arch_supports_gmem(kvm)) + valid_flags |= KVM_MEM_GUEST_MEMFD; + + /* Dirty logging private memory is not currently supported. */ +@@ -2419,7 +2419,7 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + static u64 kvm_supported_mem_attributes(struct kvm *kvm) + { +- if (!kvm || kvm_arch_has_private_mem(kvm)) ++ if (!kvm || kvm_arch_supports_gmem(kvm)) + return KVM_MEMORY_ATTRIBUTE_PRIVATE; + + return 0; +@@ -4912,7 +4912,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) + #endif + #ifdef CONFIG_KVM_GMEM + case KVM_CAP_GUEST_MEMFD: +- return !kvm || kvm_arch_has_private_mem(kvm); ++ return !kvm || kvm_arch_supports_gmem(kvm); + #endif + default: + break; +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0004-KVM-x86-Mark-KVM_X86_SW_PROTECTED_VM-as-supporting-g.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0004-KVM-x86-Mark-KVM_X86_SW_PROTECTED_VM-as-supporting-g.patch deleted file mode 100644 index 2d32a4cefc2..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0004-KVM-x86-Mark-KVM_X86_SW_PROTECTED_VM-as-supporting-g.patch +++ /dev/null @@ -1,58 +0,0 @@ -From d16c343f0f95ecd8d2cda2dfba4ac8b7c293f217 Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:19 +0000 -Subject: [PATCH 04/26] KVM: x86: Mark KVM_X86_SW_PROTECTED_VM as supporting - guest_memfd shared memory - -The KVM_X86_SW_PROTECTED_VM type is meant for experimentation and -does not have any underlying support for protected guests. This -makes it a good candidate for testing mapping shared memory. -Therefore, when the kconfig option is enabled, mark -KVM_X86_SW_PROTECTED_VM as supporting shared memory. - -This means that this memory is considered by guest_memfd to be -shared with the host, with the possibility of in-place conversion -between shared and private. This allows the host to map and fault -in guest_memfd memory belonging to this VM type. - -Signed-off-by: Fuad Tabba ---- - arch/x86/include/asm/kvm_host.h | 5 +++++ - arch/x86/kvm/Kconfig | 3 ++- - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h -index 32ae3aa50c7e..b874e54a5ee4 100644 ---- a/arch/x86/include/asm/kvm_host.h -+++ b/arch/x86/include/asm/kvm_host.h -@@ -2246,8 +2246,13 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, - - #ifdef CONFIG_KVM_PRIVATE_MEM - #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) -+ -+#define kvm_arch_gmem_supports_shared_mem(kvm) \ -+ (IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) && \ -+ ((kvm)->arch.vm_type == KVM_X86_SW_PROTECTED_VM)) - #else - #define kvm_arch_has_private_mem(kvm) false -+#define kvm_arch_gmem_supports_shared_mem(kvm) false - #endif - - #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) -diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig -index ea2c4f21c1ca..22d1bcdaad58 100644 ---- a/arch/x86/kvm/Kconfig -+++ b/arch/x86/kvm/Kconfig -@@ -45,7 +45,8 @@ config KVM_X86 - select HAVE_KVM_PM_NOTIFIER if PM - select KVM_GENERIC_HARDWARE_ENABLING - select KVM_GENERIC_PRE_FAULT_MEMORY -- select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM -+ select KVM_PRIVATE_MEM if KVM_SW_PROTECTED_VM -+ select KVM_GMEM_SHARED_MEM if KVM_SW_PROTECTED_VM - select KVM_WERROR if WERROR - - config KVM --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0005-KVM-arm64-Refactor-user_mem_abort-calculation-of-for.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0005-KVM-arm64-Refactor-user_mem_abort-calculation-of-for.patch deleted file mode 100644 index 905c88558d8..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0005-KVM-arm64-Refactor-user_mem_abort-calculation-of-for.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 483ccb70335cb0c76161caf76c0ccb7c618038e2 Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:20 +0000 -Subject: [PATCH 05/26] KVM: arm64: Refactor user_mem_abort() calculation of - force_pte - -To simplify the code and to make the assumptions clearer, -refactor user_mem_abort() by immediately setting force_pte to -true if the conditions are met. Also, remove the comment about -logging_active being guaranteed to never be true for VM_PFNMAP -memslots, since it's not technically correct right now. - -No functional change intended. - -Signed-off-by: Fuad Tabba ---- - arch/arm64/kvm/mmu.c | 13 ++++--------- - 1 file changed, 4 insertions(+), 9 deletions(-) - -diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c -index 1f55b0c7b11d..887ffa1f5b14 100644 ---- a/arch/arm64/kvm/mmu.c -+++ b/arch/arm64/kvm/mmu.c -@@ -1460,7 +1460,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - bool fault_is_perm) - { - int ret = 0; -- bool write_fault, writable, force_pte = false; -+ bool write_fault, writable; - bool exec_fault, mte_allowed; - bool device = false, vfio_allow_any_uc = false; - unsigned long mmu_seq; -@@ -1472,6 +1472,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - gfn_t gfn; - kvm_pfn_t pfn; - bool logging_active = memslot_is_logging(memslot); -+ bool force_pte = logging_active || is_protected_kvm_enabled(); - long vma_pagesize, fault_granule; - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; - struct kvm_pgtable *pgt; -@@ -1521,16 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - return -EFAULT; - } - -- /* -- * logging_active is guaranteed to never be true for VM_PFNMAP -- * memslots. -- */ -- if (logging_active || is_protected_kvm_enabled()) { -- force_pte = true; -+ if (force_pte) - vma_shift = PAGE_SHIFT; -- } else { -+ else - vma_shift = get_vma_page_shift(vma, hva); -- } - - switch (vma_shift) { - #ifndef __PAGETABLE_PMD_FOLDED --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0005-KVM-x86-Rename-kvm-arch.has_private_mem-to-kvm-arch..patch b/resources/hiding_ci/linux_patches/05-mmap-support/0005-KVM-x86-Rename-kvm-arch.has_private_mem-to-kvm-arch..patch new file mode 100644 index 00000000000..d5088b8cbc3 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0005-KVM-x86-Rename-kvm-arch.has_private_mem-to-kvm-arch..patch @@ -0,0 +1,91 @@ +From 6724239b0af354b909d224394ba639d1a9d66d09 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:16 +0100 +Subject: [PATCH 05/42] KVM: x86: Rename kvm->arch.has_private_mem to + kvm->arch.supports_gmem + +The bool has_private_mem is used to indicate whether guest_memfd is +supported. Rename it to supports_gmem to make its meaning clearer and to +decouple memory being private from guest_memfd. + +Reviewed-by: Ira Weiny +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + arch/x86/include/asm/kvm_host.h | 4 ++-- + arch/x86/kvm/mmu/mmu.c | 2 +- + arch/x86/kvm/svm/svm.c | 4 ++-- + arch/x86/kvm/x86.c | 3 +-- + 4 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 3d69da6d2d9e..4bc50c1e21bd 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1341,7 +1341,7 @@ struct kvm_arch { + unsigned int indirect_shadow_pages; + u8 mmu_valid_gen; + u8 vm_type; +- bool has_private_mem; ++ bool supports_gmem; + bool has_protected_state; + bool pre_fault_allowed; + struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; +@@ -2270,7 +2270,7 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + + + #ifdef CONFIG_KVM_GMEM +-#define kvm_arch_supports_gmem(kvm) ((kvm)->arch.has_private_mem) ++#define kvm_arch_supports_gmem(kvm) ((kvm)->arch.supports_gmem) + #else + #define kvm_arch_supports_gmem(kvm) false + #endif +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index b13f76630b1a..72ed7344cbf4 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -3488,7 +3488,7 @@ static bool page_fault_can_be_fast(struct kvm *kvm, struct kvm_page_fault *fault + * on RET_PF_SPURIOUS until the update completes, or an actual spurious + * case might go down the slow path. Either case will resolve itself. + */ +- if (kvm->arch.has_private_mem && ++ if (kvm->arch.supports_gmem && + fault->is_private != kvm_mem_is_private(kvm, fault->gfn)) + return false; + +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index ab9b947dbf4f..67ab05fd3517 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -5180,8 +5180,8 @@ static int svm_vm_init(struct kvm *kvm) + (type == KVM_X86_SEV_ES_VM || type == KVM_X86_SNP_VM); + to_kvm_sev_info(kvm)->need_init = true; + +- kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); +- kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; ++ kvm->arch.supports_gmem = (type == KVM_X86_SNP_VM); ++ kvm->arch.pre_fault_allowed = !kvm->arch.supports_gmem; + } + + if (!pause_filter_count || !pause_filter_thresh) +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index b58a74c1722d..401256ee817f 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -12778,8 +12778,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + return -EINVAL; + + kvm->arch.vm_type = type; +- kvm->arch.has_private_mem = +- (type == KVM_X86_SW_PROTECTED_VM); ++ kvm->arch.supports_gmem = (type == KVM_X86_SW_PROTECTED_VM); + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0006-KVM-Rename-kvm_slot_can_be_private-to-kvm_slot_has_g.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0006-KVM-Rename-kvm_slot_can_be_private-to-kvm_slot_has_g.patch new file mode 100644 index 00000000000..c4575d9fdf6 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0006-KVM-Rename-kvm_slot_can_be_private-to-kvm_slot_has_g.patch @@ -0,0 +1,97 @@ +From 2b5255551cda9733eeb2c0a52ce4b5e083701e63 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:17 +0100 +Subject: [PATCH 06/42] KVM: Rename kvm_slot_can_be_private() to + kvm_slot_has_gmem() + +The function kvm_slot_can_be_private() is used to check whether a memory +slot is backed by guest_memfd. Rename it to kvm_slot_has_gmem() to make +that clearer and to decouple memory being private from guest_memfd. + +Reviewed-by: Ira Weiny +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + arch/x86/kvm/mmu/mmu.c | 4 ++-- + arch/x86/kvm/svm/sev.c | 4 ++-- + include/linux/kvm_host.h | 2 +- + virt/kvm/guest_memfd.c | 2 +- + 4 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index 72ed7344cbf4..ada81a75d790 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -3285,7 +3285,7 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, + int kvm_mmu_max_mapping_level(struct kvm *kvm, + const struct kvm_memory_slot *slot, gfn_t gfn) + { +- bool is_private = kvm_slot_can_be_private(slot) && ++ bool is_private = kvm_slot_has_gmem(slot) && + kvm_mem_is_private(kvm, gfn); + + return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); +@@ -4498,7 +4498,7 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, + { + int max_order, r; + +- if (!kvm_slot_can_be_private(fault->slot)) { ++ if (!kvm_slot_has_gmem(fault->slot)) { + kvm_mmu_prepare_memory_fault_exit(vcpu, fault); + return -EFAULT; + } +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c +index 459c3b791fd4..ade7a5b36c68 100644 +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -2319,7 +2319,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) + mutex_lock(&kvm->slots_lock); + + memslot = gfn_to_memslot(kvm, params.gfn_start); +- if (!kvm_slot_can_be_private(memslot)) { ++ if (!kvm_slot_has_gmem(memslot)) { + ret = -EINVAL; + goto out; + } +@@ -4670,7 +4670,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) + } + + slot = gfn_to_memslot(kvm, gfn); +- if (!kvm_slot_can_be_private(slot)) { ++ if (!kvm_slot_has_gmem(slot)) { + pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n", + gpa); + return; +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index a0e661aa3f8a..76b85099da99 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -614,7 +614,7 @@ struct kvm_memory_slot { + #endif + }; + +-static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) ++static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) + { + return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); + } +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index befea51bbc75..6db515833f61 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -654,7 +654,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long + return -EINVAL; + + slot = gfn_to_memslot(kvm, start_gfn); +- if (!kvm_slot_can_be_private(slot)) ++ if (!kvm_slot_has_gmem(slot)) + return -EINVAL; + + file = kvm_gmem_get_file(slot); +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0006-KVM-guest_memfd-Handle-in-place-shared-memory-as-gue.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0006-KVM-guest_memfd-Handle-in-place-shared-memory-as-gue.patch deleted file mode 100644 index 3e0dea5a7e6..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0006-KVM-guest_memfd-Handle-in-place-shared-memory-as-gue.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b1e925d4d5db8513dba67c3a9d40a2b507668f09 Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:18 +0000 -Subject: [PATCH 06/26] KVM: guest_memfd: Handle in-place shared memory as - guest_memfd backed memory - -For VMs that allow sharing guest_memfd backed memory in-place, -handle that memory the same as "private" guest_memfd memory. This -means that faulting that memory in the host or in the guest will -go through the guest_memfd subsystem. - -Note that the word "private" in the name of the function -kvm_mem_is_private() doesn't necessarily indicate that the memory -isn't shared, but is due to the history and evolution of -guest_memfd and the various names it has received. In effect, -this function is used to multiplex between the path of a normal -page fault and the path of a guest_memfd backed page fault. - -Signed-off-by: Fuad Tabba ---- - include/linux/kvm_host.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 601bbcaa5e41..3d5595a71a2a 100644 ---- a/include/linux/kvm_host.h -+++ b/include/linux/kvm_host.h -@@ -2521,7 +2521,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) - #else - static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) - { -- return false; -+ return kvm_arch_gmem_supports_shared_mem(kvm) && -+ kvm_slot_can_be_private(gfn_to_memslot(kvm, gfn)); - } - #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ - --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0007-KVM-Fix-comments-that-refer-to-slots_lock.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0007-KVM-Fix-comments-that-refer-to-slots_lock.patch new file mode 100644 index 00000000000..12d5e48f102 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0007-KVM-Fix-comments-that-refer-to-slots_lock.patch @@ -0,0 +1,48 @@ +From 69fff3f1af3aa61fb0d855904400f52d748b8c42 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:18 +0100 +Subject: [PATCH 07/42] KVM: Fix comments that refer to slots_lock + +Fix comments so that they refer to slots_lock instead of slots_locks +(remove trailing s). + +Reviewed-by: David Hildenbrand +Reviewed-by: Ira Weiny +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Signed-off-by: Fuad Tabba +--- + include/linux/kvm_host.h | 2 +- + virt/kvm/kvm_main.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 76b85099da99..aec8e4182a65 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -859,7 +859,7 @@ struct kvm { + struct notifier_block pm_notifier; + #endif + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +- /* Protected by slots_locks (for writes) and RCU (for reads) */ ++ /* Protected by slots_lock (for writes) and RCU (for reads) */ + struct xarray mem_attr_array; + #endif + char stats_id[KVM_STATS_NAME_SIZE]; +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 6efbea208fa6..d41bcc6a78b0 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -331,7 +331,7 @@ void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + * All current use cases for flushing the TLBs for a specific memslot + * are related to dirty logging, and many do the TLB flush out of + * mmu_lock. The interaction between the various operations on memslot +- * must be serialized by slots_locks to ensure the TLB flush from one ++ * must be serialized by slots_lock to ensure the TLB flush from one + * operation is observed by any other operation on the same memslot. + */ + lockdep_assert_held(&kvm->slots_lock); +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0007-KVM-arm64-Handle-guest_memfd-backed-guest-page-fault.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0007-KVM-arm64-Handle-guest_memfd-backed-guest-page-fault.patch deleted file mode 100644 index 5b68d6e183e..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0007-KVM-arm64-Handle-guest_memfd-backed-guest-page-fault.patch +++ /dev/null @@ -1,174 +0,0 @@ -From 996513a423377349767d5cfef87850e80131854f Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:21 +0000 -Subject: [PATCH 07/26] KVM: arm64: Handle guest_memfd()-backed guest page - faults - -Add arm64 support for handling guest page faults on guest_memfd -backed memslots. - -For now, the fault granule is restricted to PAGE_SIZE. - -Signed-off-by: Fuad Tabba ---- - arch/arm64/kvm/mmu.c | 65 +++++++++++++++++++++++++++------------- - include/linux/kvm_host.h | 5 ++++ - virt/kvm/kvm_main.c | 5 ---- - 3 files changed, 50 insertions(+), 25 deletions(-) - -diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c -index 887ffa1f5b14..adb0681fc1c6 100644 ---- a/arch/arm64/kvm/mmu.c -+++ b/arch/arm64/kvm/mmu.c -@@ -1454,6 +1454,30 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) - return vma->vm_flags & VM_MTE_ALLOWED; - } - -+static kvm_pfn_t faultin_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, -+ gfn_t gfn, bool write_fault, bool *writable, -+ struct page **page, bool is_private) -+{ -+ kvm_pfn_t pfn; -+ int ret; -+ -+ if (!is_private) -+ return __kvm_faultin_pfn(slot, gfn, write_fault ? FOLL_WRITE : 0, writable, page); -+ -+ *writable = false; -+ -+ ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, page, NULL); -+ if (!ret) { -+ *writable = !memslot_is_readonly(slot); -+ return pfn; -+ } -+ -+ if (ret == -EHWPOISON) -+ return KVM_PFN_ERR_HWPOISON; -+ -+ return KVM_PFN_ERR_NOSLOT_MASK; -+} -+ - static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_s2_trans *nested, - struct kvm_memory_slot *memslot, unsigned long hva, -@@ -1461,19 +1485,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - { - int ret = 0; - bool write_fault, writable; -- bool exec_fault, mte_allowed; -+ bool exec_fault, mte_allowed = false; - bool device = false, vfio_allow_any_uc = false; - unsigned long mmu_seq; - phys_addr_t ipa = fault_ipa; - struct kvm *kvm = vcpu->kvm; -- struct vm_area_struct *vma; -+ struct vm_area_struct *vma = NULL; - short vma_shift; - void *memcache; -- gfn_t gfn; -+ gfn_t gfn = ipa >> PAGE_SHIFT; - kvm_pfn_t pfn; - bool logging_active = memslot_is_logging(memslot); -- bool force_pte = logging_active || is_protected_kvm_enabled(); -- long vma_pagesize, fault_granule; -+ bool is_gmem = kvm_mem_is_private(kvm, gfn); -+ bool force_pte = logging_active || is_gmem || is_protected_kvm_enabled(); -+ long vma_pagesize, fault_granule = PAGE_SIZE; - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; - struct kvm_pgtable *pgt; - struct page *page; -@@ -1510,16 +1535,22 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - return ret; - } - -+ mmap_read_lock(current->mm); -+ - /* - * Let's check if we will get back a huge page backed by hugetlbfs, or - * get block mapping for device MMIO region. - */ -- mmap_read_lock(current->mm); -- vma = vma_lookup(current->mm, hva); -- if (unlikely(!vma)) { -- kvm_err("Failed to find VMA for hva 0x%lx\n", hva); -- mmap_read_unlock(current->mm); -- return -EFAULT; -+ if (!is_gmem) { -+ vma = vma_lookup(current->mm, hva); -+ if (unlikely(!vma)) { -+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva); -+ mmap_read_unlock(current->mm); -+ return -EFAULT; -+ } -+ -+ vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED; -+ mte_allowed = kvm_vma_mte_allowed(vma); - } - - if (force_pte) -@@ -1590,18 +1621,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - ipa &= ~(vma_pagesize - 1); - } - -- gfn = ipa >> PAGE_SHIFT; -- mte_allowed = kvm_vma_mte_allowed(vma); -- -- vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED; -- - /* Don't use the VMA after the unlock -- it may have vanished */ - vma = NULL; - - /* - * Read mmu_invalidate_seq so that KVM can detect if the results of -- * vma_lookup() or __kvm_faultin_pfn() become stale prior to -- * acquiring kvm->mmu_lock. -+ * vma_lookup() or faultin_pfn() become stale prior to acquiring -+ * kvm->mmu_lock. - * - * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs - * with the smp_wmb() in kvm_mmu_invalidate_end(). -@@ -1609,8 +1635,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - mmu_seq = vcpu->kvm->mmu_invalidate_seq; - mmap_read_unlock(current->mm); - -- pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, -- &writable, &page); -+ pfn = faultin_pfn(kvm, memslot, gfn, write_fault, &writable, &page, is_gmem); - if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(hva, vma_shift); - return 0; -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 3d5595a71a2a..ec3bedc18eab 100644 ---- a/include/linux/kvm_host.h -+++ b/include/linux/kvm_host.h -@@ -1882,6 +1882,11 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) - return gfn_to_memslot(kvm, gfn)->id; - } - -+static inline bool memslot_is_readonly(const struct kvm_memory_slot *slot) -+{ -+ return slot->flags & KVM_MEM_READONLY; -+} -+ - static inline gfn_t - hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) - { -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 38f0f402ea46..3e40acb9f5c0 100644 ---- a/virt/kvm/kvm_main.c -+++ b/virt/kvm/kvm_main.c -@@ -2624,11 +2624,6 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) - return size; - } - --static bool memslot_is_readonly(const struct kvm_memory_slot *slot) --{ -- return slot->flags & KVM_MEM_READONLY; --} -- - static unsigned long __gfn_to_hva_many(const struct kvm_memory_slot *slot, gfn_t gfn, - gfn_t *nr_pages, bool write) - { --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0008-KVM-Fix-comment-that-refers-to-kvm-uapi-header-path.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0008-KVM-Fix-comment-that-refers-to-kvm-uapi-header-path.patch new file mode 100644 index 00000000000..640d04e8546 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0008-KVM-Fix-comment-that-refers-to-kvm-uapi-header-path.patch @@ -0,0 +1,35 @@ +From b0d555e0113bd1044ad484411c281d411d604af8 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:19 +0100 +Subject: [PATCH 08/42] KVM: Fix comment that refers to kvm uapi header path + +The comment that points to the path where the user-visible memslot flags +are refers to an outdated path and has a typo. + +Update the comment to refer to the correct path. + +Reviewed-by: David Hildenbrand +Reviewed-by: Gavin Shan +Reviewed-by: Shivank Garg +Reviewed-by: Vlastimil Babka +Signed-off-by: Fuad Tabba +--- + include/linux/kvm_host.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index aec8e4182a65..9a6712151a74 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -52,7 +52,7 @@ + /* + * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally + * used in kvm, other bits are visible for userspace which are defined in +- * include/linux/kvm_h. ++ * include/uapi/linux/kvm.h. + */ + #define KVM_MEMSLOT_INVALID (1UL << 16) + +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0008-KVM-guest_memfd-selftests-guest_memfd-mmap-test-when.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0008-KVM-guest_memfd-selftests-guest_memfd-mmap-test-when.patch deleted file mode 100644 index 2a5a355a2e1..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0008-KVM-guest_memfd-selftests-guest_memfd-mmap-test-when.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 1ee5d01987bff47f007fb86ad7738b299816b2ef Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:23 +0000 -Subject: [PATCH 08/26] KVM: guest_memfd: selftests: guest_memfd mmap() test - when mapping is allowed - -Expand the guest_memfd selftests to include testing mapping guest -memory for VM types that support it. - -Also, build the guest_memfd selftest for arm64. - -Signed-off-by: Fuad Tabba ---- - tools/testing/selftests/kvm/Makefile.kvm | 1 + - .../testing/selftests/kvm/guest_memfd_test.c | 75 +++++++++++++++++-- - 2 files changed, 70 insertions(+), 6 deletions(-) - -diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm -index 4277b983cace..c9a3f30e28dd 100644 ---- a/tools/testing/selftests/kvm/Makefile.kvm -+++ b/tools/testing/selftests/kvm/Makefile.kvm -@@ -160,6 +160,7 @@ TEST_GEN_PROGS_arm64 += coalesced_io_test - TEST_GEN_PROGS_arm64 += demand_paging_test - TEST_GEN_PROGS_arm64 += dirty_log_test - TEST_GEN_PROGS_arm64 += dirty_log_perf_test -+TEST_GEN_PROGS_arm64 += guest_memfd_test - TEST_GEN_PROGS_arm64 += guest_print_test - TEST_GEN_PROGS_arm64 += get-reg-list - TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c -index ce687f8d248f..38c501e49e0e 100644 ---- a/tools/testing/selftests/kvm/guest_memfd_test.c -+++ b/tools/testing/selftests/kvm/guest_memfd_test.c -@@ -34,12 +34,48 @@ static void test_file_read_write(int fd) - "pwrite on a guest_mem fd should fail"); - } - --static void test_mmap(int fd, size_t page_size) -+static void test_mmap_allowed(int fd, size_t total_size) - { -+ size_t page_size = getpagesize(); -+ const char val = 0xaa; -+ char *mem; -+ int ret; -+ int i; -+ -+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); -+ TEST_ASSERT(mem != MAP_FAILED, "mmaping() guest memory should pass."); -+ -+ memset(mem, val, total_size); -+ for (i = 0; i < total_size; i++) -+ TEST_ASSERT_EQ(mem[i], val); -+ -+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, -+ page_size); -+ TEST_ASSERT(!ret, "fallocate the first page should succeed"); -+ -+ for (i = 0; i < page_size; i++) -+ TEST_ASSERT_EQ(mem[i], 0x00); -+ for (; i < total_size; i++) -+ TEST_ASSERT_EQ(mem[i], val); -+ -+ memset(mem, val, total_size); -+ for (i = 0; i < total_size; i++) -+ TEST_ASSERT_EQ(mem[i], val); -+ -+ ret = munmap(mem, total_size); -+ TEST_ASSERT(!ret, "munmap should succeed"); -+} -+ -+static void test_mmap_denied(int fd, size_t total_size) -+{ -+ size_t page_size = getpagesize(); - char *mem; - - mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT_EQ(mem, MAP_FAILED); -+ -+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); -+ TEST_ASSERT_EQ(mem, MAP_FAILED); - } - - static void test_file_size(int fd, size_t page_size, size_t total_size) -@@ -170,19 +206,27 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) - close(fd1); - } - --int main(int argc, char *argv[]) -+unsigned long get_shared_type(void) - { -- size_t page_size; -+#ifdef __x86_64__ -+ return KVM_X86_SW_PROTECTED_VM; -+#endif -+ return 0; -+} -+ -+void test_vm_type(unsigned long type, bool is_shared) -+{ -+ struct kvm_vm *vm; - size_t total_size; -+ size_t page_size; - int fd; -- struct kvm_vm *vm; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); - - page_size = getpagesize(); - total_size = page_size * 4; - -- vm = vm_create_barebones(); -+ vm = vm_create_barebones_type(type); - - test_create_guest_memfd_invalid(vm); - test_create_guest_memfd_multiple(vm); -@@ -190,10 +234,29 @@ int main(int argc, char *argv[]) - fd = vm_create_guest_memfd(vm, total_size, 0); - - test_file_read_write(fd); -- test_mmap(fd, page_size); -+ -+ if (is_shared) -+ test_mmap_allowed(fd, total_size); -+ else -+ test_mmap_denied(fd, total_size); -+ - test_file_size(fd, page_size, total_size); - test_fallocate(fd, page_size, total_size); - test_invalid_punch_hole(fd, page_size, total_size); - - close(fd); -+ kvm_vm_release(vm); -+} -+ -+int main(int argc, char *argv[]) -+{ -+#ifndef __aarch64__ -+ /* For now, arm64 only supports shared guest memory. */ -+ test_vm_type(VM_TYPE_DEFAULT, false); -+#endif -+ -+ if (kvm_has_cap(KVM_CAP_GMEM_SHARED_MEM)) -+ test_vm_type(get_shared_type(), true); -+ -+ return 0; - } --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0009-KVM-arm64-Enable-mapping-guest_memfd-in-arm64.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0009-KVM-arm64-Enable-mapping-guest_memfd-in-arm64.patch deleted file mode 100644 index a03d592e4b0..00000000000 --- a/resources/hiding_ci/linux_patches/05-mmap-support/0009-KVM-arm64-Enable-mapping-guest_memfd-in-arm64.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 3cc51efc17a2c41a480eed36b31c1773936717e0 Mon Sep 17 00:00:00 2001 -From: Fuad Tabba -Date: Tue, 18 Mar 2025 16:18:22 +0000 -Subject: [PATCH 09/26] KVM: arm64: Enable mapping guest_memfd in arm64 - -Enable mapping guest_memfd in arm64. For now, it applies to all -VMs in arm64 that use guest_memfd. In the future, new VM types -can restrict this via kvm_arch_gmem_supports_shared_mem(). - -Signed-off-by: Fuad Tabba ---- - arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++ - arch/arm64/kvm/Kconfig | 1 + - 2 files changed, 13 insertions(+) - -diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h -index d919557af5e5..4440b2334a05 100644 ---- a/arch/arm64/include/asm/kvm_host.h -+++ b/arch/arm64/include/asm/kvm_host.h -@@ -1543,4 +1543,16 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); - #define kvm_has_s1poe(k) \ - (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) - -+#ifdef CONFIG_KVM_PRIVATE_MEM -+static inline bool kvm_arch_has_private_mem(struct kvm *kvm) -+{ -+ return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM); -+} -+ -+static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm) -+{ -+ return IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM); -+} -+#endif /* CONFIG_KVM_PRIVATE_MEM */ -+ - #endif /* __ARM64_KVM_HOST_H__ */ -diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig -index ead632ad01b4..4830d8805bed 100644 ---- a/arch/arm64/kvm/Kconfig -+++ b/arch/arm64/kvm/Kconfig -@@ -38,6 +38,7 @@ menuconfig KVM - select HAVE_KVM_VCPU_RUN_PID_CHANGE - select SCHED_INFO - select GUEST_PERF_EVENTS if PERF_EVENTS -+ select KVM_GMEM_SHARED_MEM - help - Support hosting virtualized guest machines. - --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0009-KVM-guest_memfd-Allow-host-to-map-guest_memfd-pages.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0009-KVM-guest_memfd-Allow-host-to-map-guest_memfd-pages.patch new file mode 100644 index 00000000000..2d07ac711fe --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0009-KVM-guest_memfd-Allow-host-to-map-guest_memfd-pages.patch @@ -0,0 +1,167 @@ +From fdaf4ca0334c928429992096ce96dbdba7d78687 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:20 +0100 +Subject: [PATCH 09/42] KVM: guest_memfd: Allow host to map guest_memfd pages + +This patch enables support for shared memory in guest_memfd, including +mapping that memory from host userspace. + +This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option, +and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED +flag at creation time. + +Reviewed-by: Gavin Shan +Acked-by: David Hildenbrand +Co-developed-by: Ackerley Tng +Signed-off-by: Ackerley Tng +Signed-off-by: Fuad Tabba +--- + include/linux/kvm_host.h | 13 +++++++ + include/uapi/linux/kvm.h | 1 + + virt/kvm/Kconfig | 4 +++ + virt/kvm/guest_memfd.c | 73 ++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 91 insertions(+) + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 9a6712151a74..6b63556ca150 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm *kvm) + } + #endif + ++/* ++ * Returns true if this VM supports shared mem in guest_memfd. ++ * ++ * Arch code must define kvm_arch_supports_gmem_shared_mem if support for ++ * guest_memfd is enabled. ++ */ ++#if !defined(kvm_arch_supports_gmem_shared_mem) ++static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm) ++{ ++ return false; ++} ++#endif ++ + #ifndef kvm_arch_has_readonly_mem + static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) + { +diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h +index 37891580d05d..954f0668130c 100644 +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -1592,6 +1592,7 @@ struct kvm_memory_attributes { + #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + + #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) ++#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) + + struct kvm_create_guest_memfd { + __u64 size; +diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig +index 559c93ad90be..e90884f74404 100644 +--- a/virt/kvm/Kconfig ++++ b/virt/kvm/Kconfig +@@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE + config HAVE_KVM_ARCH_GMEM_INVALIDATE + bool + depends on KVM_GMEM ++ ++config KVM_GMEM_SHARED_MEM ++ select KVM_GMEM ++ bool +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index 6db515833f61..06616b6b493b 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) + return gfn - slot->base_gfn + slot->gmem.pgoff; + } + ++static bool kvm_gmem_supports_shared(struct inode *inode) ++{ ++ const u64 flags = (u64)inode->i_private; ++ ++ if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)) ++ return false; ++ ++ return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED; ++} ++ ++static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) ++{ ++ struct inode *inode = file_inode(vmf->vma->vm_file); ++ struct folio *folio; ++ vm_fault_t ret = VM_FAULT_LOCKED; ++ ++ if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) ++ return VM_FAULT_SIGBUS; ++ ++ folio = kvm_gmem_get_folio(inode, vmf->pgoff); ++ if (IS_ERR(folio)) { ++ int err = PTR_ERR(folio); ++ ++ if (err == -EAGAIN) ++ return VM_FAULT_RETRY; ++ ++ return vmf_error(err); ++ } ++ ++ if (WARN_ON_ONCE(folio_test_large(folio))) { ++ ret = VM_FAULT_SIGBUS; ++ goto out_folio; ++ } ++ ++ if (!folio_test_uptodate(folio)) { ++ clear_highpage(folio_page(folio, 0)); ++ kvm_gmem_mark_prepared(folio); ++ } ++ ++ vmf->page = folio_file_page(folio, vmf->pgoff); ++ ++out_folio: ++ if (ret != VM_FAULT_LOCKED) { ++ folio_unlock(folio); ++ folio_put(folio); ++ } ++ ++ return ret; ++} ++ ++static const struct vm_operations_struct kvm_gmem_vm_ops = { ++ .fault = kvm_gmem_fault_shared, ++}; ++ ++static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ if (!kvm_gmem_supports_shared(file_inode(file))) ++ return -ENODEV; ++ ++ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != ++ (VM_SHARED | VM_MAYSHARE)) { ++ return -EINVAL; ++ } ++ ++ vma->vm_ops = &kvm_gmem_vm_ops; ++ ++ return 0; ++} ++ + static struct file_operations kvm_gmem_fops = { ++ .mmap = kvm_gmem_mmap, + .open = generic_file_open, + .release = kvm_gmem_release, + .fallocate = kvm_gmem_fallocate, +@@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) + u64 flags = args->flags; + u64 valid_flags = 0; + ++ if (kvm_arch_supports_gmem_shared_mem(kvm)) ++ valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; ++ + if (flags & ~valid_flags) + return -EINVAL; + +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0010-KVM-guest_memfd-Track-shared-memory-support-in-memsl.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0010-KVM-guest_memfd-Track-shared-memory-support-in-memsl.patch new file mode 100644 index 00000000000..20bb5b39272 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0010-KVM-guest_memfd-Track-shared-memory-support-in-memsl.patch @@ -0,0 +1,67 @@ +From c7a6f2f513fd0cc93092a67c74223438496a5dc2 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:21 +0100 +Subject: [PATCH 10/42] KVM: guest_memfd: Track shared memory support in + memslot + +Add a new internal flag in the top half of memslot->flags to track when +a guest_memfd-backed slot supports shared memory, which is reserved for +internal use in KVM. + +This avoids repeatedly checking the underlying guest_memfd file for +shared memory support, which requires taking a reference on the file. + +Reviewed-by: Gavin Shan +Acked-by: David Hildenbrand +Suggested-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + include/linux/kvm_host.h | 11 ++++++++++- + virt/kvm/guest_memfd.c | 2 ++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 6b63556ca150..bba7d2c14177 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -54,7 +54,8 @@ + * used in kvm, other bits are visible for userspace which are defined in + * include/uapi/linux/kvm.h. + */ +-#define KVM_MEMSLOT_INVALID (1UL << 16) ++#define KVM_MEMSLOT_INVALID (1UL << 16) ++#define KVM_MEMSLOT_SUPPORTS_GMEM_SHARED (1UL << 17) + + /* + * Bit 63 of the memslot generation number is an "update in-progress flag", +@@ -2525,6 +2526,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; + } + ++static inline bool kvm_gmem_memslot_supports_shared(const struct kvm_memory_slot *slot) ++{ ++ if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)) ++ return false; ++ ++ return slot->flags & KVM_MEMSLOT_SUPPORTS_GMEM_SHARED; ++} ++ + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) + { +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index 06616b6b493b..73b0aa2bc45f 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -592,6 +592,8 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, + */ + WRITE_ONCE(slot->gmem.file, file); + slot->gmem.pgoff = start; ++ if (kvm_gmem_supports_shared(inode)) ++ slot->flags |= KVM_MEMSLOT_SUPPORTS_GMEM_SHARED; + + xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL); + filemap_invalidate_unlock(inode->i_mapping); +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0011-KVM-x86-mmu-Handle-guest-page-faults-for-guest_memfd.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0011-KVM-x86-mmu-Handle-guest-page-faults-for-guest_memfd.patch new file mode 100644 index 00000000000..02621a9c119 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0011-KVM-x86-mmu-Handle-guest-page-faults-for-guest_memfd.patch @@ -0,0 +1,163 @@ +From aa57cd34d53f8cc0e6af44b4070c39945ac556b4 Mon Sep 17 00:00:00 2001 +From: Ackerley Tng +Date: Wed, 11 Jun 2025 14:33:22 +0100 +Subject: [PATCH 11/42] KVM: x86/mmu: Handle guest page faults for guest_memfd + with shared memory + +For memslots backed by guest_memfd with shared mem support, the KVM MMU +must always fault in pages from guest_memfd, and not from the host +userspace_addr. Update the fault handler to do so. + +This patch also refactors related function names for accuracy: + +kvm_mem_is_private() returns true only when the current private/shared +state (in the CoCo sense) of the memory is private, and returns false if +the current state is shared explicitly or impicitly, e.g., belongs to a +non-CoCo VM. + +kvm_mmu_faultin_pfn_gmem() is updated to indicate that it can be used to +fault in not just private memory, but more generally, from guest_memfd. + +Co-developed-by: David Hildenbrand +Signed-off-by: David Hildenbrand +Signed-off-by: Ackerley Tng +Co-developed-by: Fuad Tabba +Signed-off-by: Fuad Tabba +--- + arch/x86/kvm/mmu/mmu.c | 38 +++++++++++++++++++++++--------------- + include/linux/kvm_host.h | 25 +++++++++++++++++++++++-- + 2 files changed, 46 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index ada81a75d790..0932c1af47b6 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -3291,6 +3291,11 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, + return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); + } + ++static inline bool fault_from_gmem(struct kvm_page_fault *fault) ++{ ++ return fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot); ++} ++ + void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) + { + struct kvm_memory_slot *slot = fault->slot; +@@ -4467,21 +4472,25 @@ static inline u8 kvm_max_level_for_order(int order) + return PG_LEVEL_4K; + } + +-static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, +- u8 max_level, int gmem_order) ++static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, ++ struct kvm_page_fault *fault, ++ int order) + { +- u8 req_max_level; ++ u8 max_level = fault->max_level; + + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; + +- max_level = min(kvm_max_level_for_order(gmem_order), max_level); ++ max_level = min(kvm_max_level_for_order(order), max_level); + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; + +- req_max_level = kvm_x86_call(private_max_mapping_level)(kvm, pfn); +- if (req_max_level) +- max_level = min(max_level, req_max_level); ++ if (fault->is_private) { ++ u8 level = kvm_x86_call(private_max_mapping_level)(kvm, fault->pfn); ++ ++ if (level) ++ max_level = min(max_level, level); ++ } + + return max_level; + } +@@ -4493,10 +4502,10 @@ static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, + r == RET_PF_RETRY, fault->map_writable); + } + +-static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, +- struct kvm_page_fault *fault) ++static int kvm_mmu_faultin_pfn_gmem(struct kvm_vcpu *vcpu, ++ struct kvm_page_fault *fault) + { +- int max_order, r; ++ int gmem_order, r; + + if (!kvm_slot_has_gmem(fault->slot)) { + kvm_mmu_prepare_memory_fault_exit(vcpu, fault); +@@ -4504,15 +4513,14 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, + } + + r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn, +- &fault->refcounted_page, &max_order); ++ &fault->refcounted_page, &gmem_order); + if (r) { + kvm_mmu_prepare_memory_fault_exit(vcpu, fault); + return r; + } + + fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); +- fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn, +- fault->max_level, max_order); ++ fault->max_level = kvm_max_level_for_fault_and_order(vcpu->kvm, fault, gmem_order); + + return RET_PF_CONTINUE; + } +@@ -4522,8 +4530,8 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, + { + unsigned int foll = fault->write ? FOLL_WRITE : 0; + +- if (fault->is_private) +- return kvm_mmu_faultin_pfn_private(vcpu, fault); ++ if (fault_from_gmem(fault)) ++ return kvm_mmu_faultin_pfn_gmem(vcpu, fault); + + foll |= FOLL_NOWAIT; + fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll, +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index bba7d2c14177..8f7069385189 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -2547,10 +2547,31 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); + ++/* ++ * Returns true if the given gfn's private/shared status (in the CoCo sense) is ++ * private. ++ * ++ * A return value of false indicates that the gfn is explicitly or implicitly ++ * shared (i.e., non-CoCo VMs). ++ */ + static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) + { +- return IS_ENABLED(CONFIG_KVM_GMEM) && +- kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; ++ struct kvm_memory_slot *slot; ++ ++ if (!IS_ENABLED(CONFIG_KVM_GMEM)) ++ return false; ++ ++ slot = gfn_to_memslot(kvm, gfn); ++ if (kvm_slot_has_gmem(slot) && kvm_gmem_memslot_supports_shared(slot)) { ++ /* ++ * Without in-place conversion support, if a guest_memfd memslot ++ * supports shared memory, then all the slot's memory is ++ * considered not private, i.e., implicitly shared. ++ */ ++ return false; ++ } ++ ++ return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; + } + #else + static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0012-KVM-x86-Consult-guest_memfd-when-computing-max_mappi.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0012-KVM-x86-Consult-guest_memfd-when-computing-max_mappi.patch new file mode 100644 index 00000000000..685ca3d98a2 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0012-KVM-x86-Consult-guest_memfd-when-computing-max_mappi.patch @@ -0,0 +1,245 @@ +From 0d6ad36493831412e14582768d66363a60d52405 Mon Sep 17 00:00:00 2001 +From: Ackerley Tng +Date: Wed, 11 Jun 2025 14:33:23 +0100 +Subject: [PATCH 12/42] KVM: x86: Consult guest_memfd when computing + max_mapping_level + +This patch adds kvm_gmem_max_mapping_level(), which always returns +PG_LEVEL_4K since guest_memfd only supports 4K pages for now. + +When guest_memfd supports shared memory, max_mapping_level (especially +when recovering huge pages - see call to __kvm_mmu_max_mapping_level() +from recover_huge_pages_range()) should take input from +guest_memfd. + +Input from guest_memfd should be taken in these cases: + ++ if the memslot supports shared memory (guest_memfd is used for + shared memory, or in future both shared and private memory) or ++ if the memslot is only used for private memory and that gfn is + private. + +If the memslot doesn't use guest_memfd, figure out the +max_mapping_level using the host page tables like before. + +This patch also refactors and inlines the other call to +__kvm_mmu_max_mapping_level(). + +In kvm_mmu_hugepage_adjust(), guest_memfd's input is already +provided (if applicable) in fault->max_level. Hence, there is no need +to query guest_memfd. + +lpage_info is queried like before, and then if the fault is not from +guest_memfd, adjust fault->req_level based on input from host page +tables. + +Acked-by: David Hildenbrand +Signed-off-by: Ackerley Tng +Co-developed-by: Fuad Tabba +Signed-off-by: Fuad Tabba +--- + arch/x86/kvm/mmu/mmu.c | 87 +++++++++++++++++++++++++--------------- + include/linux/kvm_host.h | 11 +++++ + virt/kvm/guest_memfd.c | 12 ++++++ + 3 files changed, 78 insertions(+), 32 deletions(-) + +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index 0932c1af47b6..b071b9afb8ad 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -3258,12 +3258,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, + return level; + } + +-static int __kvm_mmu_max_mapping_level(struct kvm *kvm, +- const struct kvm_memory_slot *slot, +- gfn_t gfn, int max_level, bool is_private) ++static int kvm_lpage_info_max_mapping_level(struct kvm *kvm, ++ const struct kvm_memory_slot *slot, ++ gfn_t gfn, int max_level) + { + struct kvm_lpage_info *linfo; +- int host_level; + + max_level = min(max_level, max_huge_page_level); + for ( ; max_level > PG_LEVEL_4K; max_level--) { +@@ -3272,28 +3271,61 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, + break; + } + +- if (is_private) +- return max_level; ++ return max_level; ++} ++ ++static inline u8 kvm_max_level_for_order(int order) ++{ ++ BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); ++ ++ KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && ++ order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && ++ order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); ++ ++ if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) ++ return PG_LEVEL_1G; ++ ++ if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) ++ return PG_LEVEL_2M; ++ ++ return PG_LEVEL_4K; ++} ++ ++static inline int kvm_gmem_max_mapping_level(const struct kvm_memory_slot *slot, ++ gfn_t gfn, int max_level) ++{ ++ int max_order; + + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; + +- host_level = host_pfn_mapping_level(kvm, gfn, slot); +- return min(host_level, max_level); ++ max_order = kvm_gmem_mapping_order(slot, gfn); ++ return min(max_level, kvm_max_level_for_order(max_order)); + } + + int kvm_mmu_max_mapping_level(struct kvm *kvm, + const struct kvm_memory_slot *slot, gfn_t gfn) + { +- bool is_private = kvm_slot_has_gmem(slot) && +- kvm_mem_is_private(kvm, gfn); ++ int max_level; + +- return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); ++ max_level = kvm_lpage_info_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM); ++ if (max_level == PG_LEVEL_4K) ++ return PG_LEVEL_4K; ++ ++ if (kvm_slot_has_gmem(slot) && ++ (kvm_gmem_memslot_supports_shared(slot) || ++ kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { ++ return kvm_gmem_max_mapping_level(slot, gfn, max_level); ++ } ++ ++ return min(max_level, host_pfn_mapping_level(kvm, gfn, slot)); + } + + static inline bool fault_from_gmem(struct kvm_page_fault *fault) + { +- return fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot); ++ return fault->is_private || ++ (kvm_slot_has_gmem(fault->slot) && ++ kvm_gmem_memslot_supports_shared(fault->slot)); + } + + void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +@@ -3316,12 +3348,20 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault + * Enforce the iTLB multihit workaround after capturing the requested + * level, which will be used to do precise, accurate accounting. + */ +- fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, +- fault->gfn, fault->max_level, +- fault->is_private); ++ fault->req_level = kvm_lpage_info_max_mapping_level(vcpu->kvm, slot, ++ fault->gfn, fault->max_level); + if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) + return; + ++ if (!fault_from_gmem(fault)) { ++ int host_level; ++ ++ host_level = host_pfn_mapping_level(vcpu->kvm, fault->gfn, slot); ++ fault->req_level = min(fault->req_level, host_level); ++ if (fault->req_level == PG_LEVEL_4K) ++ return; ++ } ++ + /* + * mmu_invalidate_retry() was successful and mmu_lock is held, so + * the pmd can't be split from under us. +@@ -4455,23 +4495,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) + vcpu->stat.pf_fixed++; + } + +-static inline u8 kvm_max_level_for_order(int order) +-{ +- BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); +- +- KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && +- order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && +- order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); +- +- if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) +- return PG_LEVEL_1G; +- +- if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) +- return PG_LEVEL_2M; +- +- return PG_LEVEL_4K; +-} +- + static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, + struct kvm_page_fault *fault, + int order) +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 8f7069385189..58d7761c2a90 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -2574,6 +2574,10 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) + return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; + } + #else ++static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) ++{ ++ return 0; ++} + static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) + { + return false; +@@ -2584,6 +2588,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) + int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, struct page **page, + int *max_order); ++int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); + #else + static inline int kvm_gmem_get_pfn(struct kvm *kvm, + struct kvm_memory_slot *slot, gfn_t gfn, +@@ -2593,6 +2598,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, + KVM_BUG_ON(1, kvm); + return -EIO; + } ++static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, ++ gfn_t gfn) ++{ ++ BUILD_BUG(); ++ return 0; ++} + #endif /* CONFIG_KVM_GMEM */ + + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index 73b0aa2bc45f..ebdb2d8bf57a 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -713,6 +713,18 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, + } + EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); + ++/* ++ * Returns the mapping order for this @gfn in @slot. ++ * ++ * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were ++ * called now. ++ */ ++int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(kvm_gmem_mapping_order); ++ + #ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE + long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque) +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0013-KVM-x86-Enable-guest_memfd-shared-memory-for-non-CoC.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0013-KVM-x86-Enable-guest_memfd-shared-memory-for-non-CoC.patch new file mode 100644 index 00000000000..8049de981d0 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0013-KVM-x86-Enable-guest_memfd-shared-memory-for-non-CoC.patch @@ -0,0 +1,74 @@ +From 661b138e0604819c4b93b2bda4d5e3f78011d4eb Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:24 +0100 +Subject: [PATCH 13/42] KVM: x86: Enable guest_memfd shared memory for non-CoCo + VMs + +Define the architecture-specific macro to enable shared memory support +in guest_memfd for ordinary, i.e., non-CoCo, VM types, specifically +KVM_X86_DEFAULT_VM and KVM_X86_SW_PROTECTED_VM. + +Enable the KVM_GMEM_SHARED_MEM Kconfig option if KVM_SW_PROTECTED_VM is +enabled. + +Co-developed-by: Ackerley Tng +Signed-off-by: Ackerley Tng +Signed-off-by: Fuad Tabba +--- + arch/x86/include/asm/kvm_host.h | 10 ++++++++++ + arch/x86/kvm/Kconfig | 1 + + arch/x86/kvm/x86.c | 3 ++- + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 4bc50c1e21bd..7b9ccdd99f32 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -2271,8 +2271,18 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + + #ifdef CONFIG_KVM_GMEM + #define kvm_arch_supports_gmem(kvm) ((kvm)->arch.supports_gmem) ++ ++/* ++ * CoCo VMs with hardware support that use guest_memfd only for backing private ++ * memory, e.g., TDX, cannot use guest_memfd with userspace mapping enabled. ++ */ ++#define kvm_arch_supports_gmem_shared_mem(kvm) \ ++ (IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) && \ ++ ((kvm)->arch.vm_type == KVM_X86_SW_PROTECTED_VM || \ ++ (kvm)->arch.vm_type == KVM_X86_DEFAULT_VM)) + #else + #define kvm_arch_supports_gmem(kvm) false ++#define kvm_arch_supports_gmem_shared_mem(kvm) false + #endif + + #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) +diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig +index 9151cd82adab..29845a286430 100644 +--- a/arch/x86/kvm/Kconfig ++++ b/arch/x86/kvm/Kconfig +@@ -47,6 +47,7 @@ config KVM_X86 + select KVM_GENERIC_HARDWARE_ENABLING + select KVM_GENERIC_PRE_FAULT_MEMORY + select KVM_GENERIC_GMEM_POPULATE if KVM_SW_PROTECTED_VM ++ select KVM_GMEM_SHARED_MEM if KVM_SW_PROTECTED_VM + select KVM_WERROR if WERROR + + config KVM +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 401256ee817f..e21f5f2fe059 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -12778,7 +12778,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + return -EINVAL; + + kvm->arch.vm_type = type; +- kvm->arch.supports_gmem = (type == KVM_X86_SW_PROTECTED_VM); ++ kvm->arch.supports_gmem = ++ type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0014-KVM-arm64-Refactor-user_mem_abort.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0014-KVM-arm64-Refactor-user_mem_abort.patch new file mode 100644 index 00000000000..f5b96a0df5f --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0014-KVM-arm64-Refactor-user_mem_abort.patch @@ -0,0 +1,195 @@ +From d8bd2cf8aac924d4e2a992c72e71212dd4e93c8f Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:25 +0100 +Subject: [PATCH 14/42] KVM: arm64: Refactor user_mem_abort() + +To simplify the code and to make the assumptions clearer, +refactor user_mem_abort() by immediately setting force_pte to +true if the conditions are met. + +Remove the comment about logging_active being guaranteed to never be +true for VM_PFNMAP memslots, since it's not actually correct. + +Move code that will be reused in the following patch into separate +functions. + +Other small instances of tidying up. + +No functional change intended. + +Reviewed-by: Gavin Shan +Signed-off-by: Fuad Tabba +--- + arch/arm64/kvm/mmu.c | 100 ++++++++++++++++++++++++------------------- + 1 file changed, 55 insertions(+), 45 deletions(-) + +diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c +index 2942ec92c5a4..58662e0ef13e 100644 +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -1470,13 +1470,56 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) + return vma->vm_flags & VM_MTE_ALLOWED; + } + ++static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache, ++ void **memcache) ++{ ++ int min_pages; ++ ++ if (!is_protected_kvm_enabled()) ++ *memcache = &vcpu->arch.mmu_page_cache; ++ else ++ *memcache = &vcpu->arch.pkvm_memcache; ++ ++ if (!topup_memcache) ++ return 0; ++ ++ min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); ++ ++ if (!is_protected_kvm_enabled()) ++ return kvm_mmu_topup_memory_cache(*memcache, min_pages); ++ ++ return topup_hyp_memcache(*memcache, min_pages); ++} ++ ++/* ++ * Potentially reduce shadow S2 permissions to match the guest's own S2. For ++ * exec faults, we'd only reach this point if the guest actually allowed it (see ++ * kvm_s2_handle_perm_fault). ++ * ++ * Also encode the level of the original translation in the SW bits of the leaf ++ * entry as a proxy for the span of that translation. This will be retrieved on ++ * TLB invalidation from the guest and used to limit the invalidation scope if a ++ * TTL hint or a range isn't provided. ++ */ ++static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, ++ enum kvm_pgtable_prot *prot, ++ bool *writable) ++{ ++ *writable &= kvm_s2_trans_writable(nested); ++ if (!kvm_s2_trans_readable(nested)) ++ *prot &= ~KVM_PGTABLE_PROT_R; ++ ++ *prot |= kvm_encode_nested_level(nested); ++} ++ + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_s2_trans *nested, + struct kvm_memory_slot *memslot, unsigned long hva, + bool fault_is_perm) + { + int ret = 0; +- bool write_fault, writable, force_pte = false; ++ bool topup_memcache; ++ bool write_fault, writable; + bool exec_fault, mte_allowed; + bool device = false, vfio_allow_any_uc = false; + unsigned long mmu_seq; +@@ -1488,6 +1531,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + gfn_t gfn; + kvm_pfn_t pfn; + bool logging_active = memslot_is_logging(memslot); ++ bool force_pte = logging_active; + long vma_pagesize, fault_granule; + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; + struct kvm_pgtable *pgt; +@@ -1505,28 +1549,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + return -EFAULT; + } + +- if (!is_protected_kvm_enabled()) +- memcache = &vcpu->arch.mmu_page_cache; +- else +- memcache = &vcpu->arch.pkvm_memcache; +- + /* + * Permission faults just need to update the existing leaf entry, + * and so normally don't require allocations from the memcache. The + * only exception to this is when dirty logging is enabled at runtime + * and a write fault needs to collapse a block entry into a table. + */ +- if (!fault_is_perm || (logging_active && write_fault)) { +- int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); +- +- if (!is_protected_kvm_enabled()) +- ret = kvm_mmu_topup_memory_cache(memcache, min_pages); +- else +- ret = topup_hyp_memcache(memcache, min_pages); +- +- if (ret) +- return ret; +- } ++ topup_memcache = !fault_is_perm || (logging_active && write_fault); ++ ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache); ++ if (ret) ++ return ret; + + /* + * Let's check if we will get back a huge page backed by hugetlbfs, or +@@ -1540,16 +1572,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + return -EFAULT; + } + +- /* +- * logging_active is guaranteed to never be true for VM_PFNMAP +- * memslots. +- */ +- if (logging_active) { +- force_pte = true; ++ if (force_pte) + vma_shift = PAGE_SHIFT; +- } else { ++ else + vma_shift = get_vma_page_shift(vma, hva); +- } + + switch (vma_shift) { + #ifndef __PAGETABLE_PMD_FOLDED +@@ -1601,7 +1627,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + max_map_size = PAGE_SIZE; + + force_pte = (max_map_size == PAGE_SIZE); +- vma_pagesize = min(vma_pagesize, (long)max_map_size); ++ vma_pagesize = min_t(long, vma_pagesize, max_map_size); + } + + /* +@@ -1630,7 +1656,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs + * with the smp_wmb() in kvm_mmu_invalidate_end(). + */ +- mmu_seq = vcpu->kvm->mmu_invalidate_seq; ++ mmu_seq = kvm->mmu_invalidate_seq; + mmap_read_unlock(current->mm); + + pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, +@@ -1665,24 +1691,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + if (exec_fault && device) + return -ENOEXEC; + +- /* +- * Potentially reduce shadow S2 permissions to match the guest's own +- * S2. For exec faults, we'd only reach this point if the guest +- * actually allowed it (see kvm_s2_handle_perm_fault). +- * +- * Also encode the level of the original translation in the SW bits +- * of the leaf entry as a proxy for the span of that translation. +- * This will be retrieved on TLB invalidation from the guest and +- * used to limit the invalidation scope if a TTL hint or a range +- * isn't provided. +- */ +- if (nested) { +- writable &= kvm_s2_trans_writable(nested); +- if (!kvm_s2_trans_readable(nested)) +- prot &= ~KVM_PGTABLE_PROT_R; +- +- prot |= kvm_encode_nested_level(nested); +- } ++ if (nested) ++ adjust_nested_fault_perms(nested, &prot, &writable); + + kvm_fault_lock(kvm); + pgt = vcpu->arch.hw_mmu->pgt; +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0015-KVM-arm64-Handle-guest_memfd-backed-guest-page-fault.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0015-KVM-arm64-Handle-guest_memfd-backed-guest-page-fault.patch new file mode 100644 index 00000000000..7b4aa15486a --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0015-KVM-arm64-Handle-guest_memfd-backed-guest-page-fault.patch @@ -0,0 +1,125 @@ +From c04bce997f55f5463df73708c15ba0fae0d142b9 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:26 +0100 +Subject: [PATCH 15/42] KVM: arm64: Handle guest_memfd-backed guest page faults + +Add arm64 support for handling guest page faults on guest_memfd backed +memslots. Until guest_memfd supports huge pages, the fault granule is +restricted to PAGE_SIZE. + +Reviewed-by: Gavin Shan +Signed-off-by: Fuad Tabba +--- + arch/arm64/kvm/mmu.c | 82 ++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 79 insertions(+), 3 deletions(-) + +diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c +index 58662e0ef13e..71f8b53683e7 100644 +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -1512,6 +1512,78 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, + *prot |= kvm_encode_nested_level(nested); + } + ++#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) ++ ++static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ++ struct kvm_s2_trans *nested, ++ struct kvm_memory_slot *memslot, bool is_perm) ++{ ++ bool write_fault, exec_fault, writable; ++ enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; ++ enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; ++ struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; ++ struct page *page; ++ struct kvm *kvm = vcpu->kvm; ++ void *memcache; ++ kvm_pfn_t pfn; ++ gfn_t gfn; ++ int ret; ++ ++ ret = prepare_mmu_memcache(vcpu, true, &memcache); ++ if (ret) ++ return ret; ++ ++ if (nested) ++ gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT; ++ else ++ gfn = fault_ipa >> PAGE_SHIFT; ++ ++ write_fault = kvm_is_write_fault(vcpu); ++ exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); ++ ++ if (write_fault && exec_fault) { ++ kvm_err("Simultaneous write and execution fault\n"); ++ return -EFAULT; ++ } ++ ++ if (is_perm && !write_fault && !exec_fault) { ++ kvm_err("Unexpected L2 read permission error\n"); ++ return -EFAULT; ++ } ++ ++ ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); ++ if (ret) { ++ kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, ++ write_fault, exec_fault, false); ++ return ret; ++ } ++ ++ writable = !(memslot->flags & KVM_MEM_READONLY); ++ ++ if (nested) ++ adjust_nested_fault_perms(nested, &prot, &writable); ++ ++ if (writable) ++ prot |= KVM_PGTABLE_PROT_W; ++ ++ if (exec_fault || ++ (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && ++ (!nested || kvm_s2_trans_executable(nested)))) ++ prot |= KVM_PGTABLE_PROT_X; ++ ++ kvm_fault_lock(kvm); ++ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE, ++ __pfn_to_phys(pfn), prot, ++ memcache, flags); ++ kvm_release_faultin_page(kvm, page, !!ret, writable); ++ kvm_fault_unlock(kvm); ++ ++ if (writable && !ret) ++ mark_page_dirty_in_slot(kvm, memslot, gfn); ++ ++ return ret != -EAGAIN ? ret : 0; ++} ++ + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_s2_trans *nested, + struct kvm_memory_slot *memslot, unsigned long hva, +@@ -1536,7 +1608,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; + struct kvm_pgtable *pgt; + struct page *page; +- enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; ++ enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; + + if (fault_is_perm) + fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); +@@ -1963,8 +2035,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) + goto out_unlock; + } + +- ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, +- esr_fsc_is_permission_fault(esr)); ++ if (kvm_slot_has_gmem(memslot)) ++ ret = gmem_abort(vcpu, fault_ipa, nested, memslot, ++ esr_fsc_is_permission_fault(esr)); ++ else ++ ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, ++ esr_fsc_is_permission_fault(esr)); + if (ret == 0) + ret = 1; + out: +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0016-KVM-arm64-Enable-host-mapping-of-shared-guest_memfd-.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0016-KVM-arm64-Enable-host-mapping-of-shared-guest_memfd-.patch new file mode 100644 index 00000000000..2a129dc7a11 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0016-KVM-arm64-Enable-host-mapping-of-shared-guest_memfd-.patch @@ -0,0 +1,69 @@ +From 3833ce3bfc91b7446fbbf749410a8ad7cf6767f1 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:27 +0100 +Subject: [PATCH 16/42] KVM: arm64: Enable host mapping of shared guest_memfd + memory + +Enable the host mapping of guest_memfd-backed memory on arm64. + +This applies to all current arm64 VM types that support guest_memfd. +Future VM types can restrict this behavior via the +kvm_arch_gmem_supports_shared_mem() hook if needed. + +Reviewed-by: James Houghton +Reviewed-by: Gavin Shan +Acked-by: David Hildenbrand +Signed-off-by: Fuad Tabba +--- + arch/arm64/include/asm/kvm_host.h | 4 ++++ + arch/arm64/kvm/Kconfig | 1 + + arch/arm64/kvm/mmu.c | 7 +++++++ + 3 files changed, 12 insertions(+) + +diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h +index d27079968341..678e7b93bb01 100644 +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -1675,5 +1675,9 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); + void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); + void check_feature_map(void); + ++#ifdef CONFIG_KVM_GMEM ++#define kvm_arch_supports_gmem(kvm) true ++#define kvm_arch_supports_gmem_shared_mem(kvm) IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) ++#endif + + #endif /* __ARM64_KVM_HOST_H__ */ +diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig +index 713248f240e0..87120d46919a 100644 +--- a/arch/arm64/kvm/Kconfig ++++ b/arch/arm64/kvm/Kconfig +@@ -37,6 +37,7 @@ menuconfig KVM + select HAVE_KVM_VCPU_RUN_PID_CHANGE + select SCHED_INFO + select GUEST_PERF_EVENTS if PERF_EVENTS ++ select KVM_GMEM_SHARED_MEM + help + Support hosting virtualized guest machines. + +diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c +index 71f8b53683e7..55ac03f277e0 100644 +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -2274,6 +2274,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, + if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT)) + return -EFAULT; + ++ /* ++ * Only support guest_memfd backed memslots with shared memory, since ++ * there aren't any CoCo VMs that support only private memory on arm64. ++ */ ++ if (kvm_slot_has_gmem(new) && !kvm_gmem_memslot_supports_shared(new)) ++ return -EINVAL; ++ + hva = new->userspace_addr; + reg_end = hva + (new->npages << PAGE_SHIFT); + +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0017-KVM-Introduce-the-KVM-capability-KVM_CAP_GMEM_SHARED.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0017-KVM-Introduce-the-KVM-capability-KVM_CAP_GMEM_SHARED.patch new file mode 100644 index 00000000000..7d71be4e3f7 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0017-KVM-Introduce-the-KVM-capability-KVM_CAP_GMEM_SHARED.patch @@ -0,0 +1,74 @@ +From 1b14ab18d7a76d54667c1dc399348c36423d7570 Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:28 +0100 +Subject: [PATCH 17/42] KVM: Introduce the KVM capability + KVM_CAP_GMEM_SHARED_MEM + +This patch introduces the KVM capability KVM_CAP_GMEM_SHARED_MEM, which +indicates that guest_memfd supports shared memory (when enabled by the +flag). This support is limited to certain VM types, determined per +architecture. + +This patch also updates the KVM documentation with details on the new +capability, flag, and other information about support for shared memory +in guest_memfd. + +Reviewed-by: David Hildenbrand +Reviewed-by: Gavin Shan +Signed-off-by: Fuad Tabba +--- + Documentation/virt/kvm/api.rst | 9 +++++++++ + include/uapi/linux/kvm.h | 1 + + virt/kvm/kvm_main.c | 4 ++++ + 3 files changed, 14 insertions(+) + +diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst +index 9abf93ee5f65..0fe90d0a74b4 100644 +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -6407,6 +6407,15 @@ most one mapping per page, i.e. binding multiple memory regions to a single + guest_memfd range is not allowed (any number of memory regions can be bound to + a single guest_memfd file, but the bound ranges must not overlap). + ++When the capability KVM_CAP_GMEM_SHARED_MEM is supported, the 'flags' field ++supports GUEST_MEMFD_FLAG_SUPPORT_SHARED. Setting this flag on guest_memfd ++creation enables mmap() and faulting of guest_memfd memory to host userspace. ++ ++When the KVM MMU performs a PFN lookup to service a guest fault and the backing ++guest_memfd has the GUEST_MEMFD_FLAG_SUPPORT_SHARED set, then the fault will ++always be consumed from guest_memfd, regardless of whether it is a shared or a ++private fault. ++ + See KVM_SET_USER_MEMORY_REGION2 for additional details. + + 4.143 KVM_PRE_FAULT_MEMORY +diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h +index 954f0668130c..4fba730ec62b 100644 +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -956,6 +956,7 @@ struct kvm_enable_cap { + #define KVM_CAP_ARM_EL2 240 + #define KVM_CAP_ARM_EL2_E2H0 241 + #define KVM_CAP_RISCV_MP_STATE_RESET 242 ++#define KVM_CAP_GMEM_SHARED_MEM 243 + + struct kvm_irq_routing_irqchip { + __u32 irqchip; +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index d41bcc6a78b0..441c9b53b876 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -4913,6 +4913,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) + #ifdef CONFIG_KVM_GMEM + case KVM_CAP_GUEST_MEMFD: + return !kvm || kvm_arch_supports_gmem(kvm); ++#endif ++#ifdef CONFIG_KVM_GMEM_SHARED_MEM ++ case KVM_CAP_GMEM_SHARED_MEM: ++ return !kvm || kvm_arch_supports_gmem_shared_mem(kvm); + #endif + default: + break; +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0018-KVM-selftests-Don-t-use-hardcoded-page-sizes-in-gues.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0018-KVM-selftests-Don-t-use-hardcoded-page-sizes-in-gues.patch new file mode 100644 index 00000000000..6e86f9bc830 --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0018-KVM-selftests-Don-t-use-hardcoded-page-sizes-in-gues.patch @@ -0,0 +1,71 @@ +From 5840c7c2ee0622e1206d29f8c48f469dce0d53ed Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:29 +0100 +Subject: [PATCH 18/42] KVM: selftests: Don't use hardcoded page sizes in + guest_memfd test + +Using hardcoded page size values could cause the test to fail on systems +that have larger pages, e.g., arm64 with 64kB pages. Use getpagesize() +instead. + +Also, build the guest_memfd selftest for arm64. + +Reviewed-by: David Hildenbrand +Suggested-by: Gavin Shan +Reviewed-by: Gavin Shan +Signed-off-by: Fuad Tabba +--- + tools/testing/selftests/kvm/Makefile.kvm | 1 + + tools/testing/selftests/kvm/guest_memfd_test.c | 11 ++++++----- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm +index 38b95998e1e6..e11ed9e59ab5 100644 +--- a/tools/testing/selftests/kvm/Makefile.kvm ++++ b/tools/testing/selftests/kvm/Makefile.kvm +@@ -172,6 +172,7 @@ TEST_GEN_PROGS_arm64 += arch_timer + TEST_GEN_PROGS_arm64 += coalesced_io_test + TEST_GEN_PROGS_arm64 += dirty_log_perf_test + TEST_GEN_PROGS_arm64 += get-reg-list ++TEST_GEN_PROGS_arm64 += guest_memfd_test + TEST_GEN_PROGS_arm64 += memslot_modification_stress_test + TEST_GEN_PROGS_arm64 += memslot_perf_test + TEST_GEN_PROGS_arm64 += mmu_stress_test +diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c +index ce687f8d248f..341ba616cf55 100644 +--- a/tools/testing/selftests/kvm/guest_memfd_test.c ++++ b/tools/testing/selftests/kvm/guest_memfd_test.c +@@ -146,24 +146,25 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) + { + int fd1, fd2, ret; + struct stat st1, st2; ++ size_t page_size = getpagesize(); + +- fd1 = __vm_create_guest_memfd(vm, 4096, 0); ++ fd1 = __vm_create_guest_memfd(vm, page_size, 0); + TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); +- TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); ++ TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); + +- fd2 = __vm_create_guest_memfd(vm, 8192, 0); ++ fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); + TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); + + ret = fstat(fd2, &st2); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); +- TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); ++ TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); +- TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); ++ TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); + + close(fd2); +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/05-mmap-support/0019-KVM-selftests-guest_memfd-mmap-test-when-mapping-is-.patch b/resources/hiding_ci/linux_patches/05-mmap-support/0019-KVM-selftests-guest_memfd-mmap-test-when-mapping-is-.patch new file mode 100644 index 00000000000..012c9c9488d --- /dev/null +++ b/resources/hiding_ci/linux_patches/05-mmap-support/0019-KVM-selftests-guest_memfd-mmap-test-when-mapping-is-.patch @@ -0,0 +1,283 @@ +From 79c7f768a4dcdea740277dd1b3dc8c9027e5339a Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Wed, 11 Jun 2025 14:33:30 +0100 +Subject: [PATCH 19/42] KVM: selftests: guest_memfd mmap() test when mapping is + allowed + +Expand the guest_memfd selftests to include testing mapping guest +memory for VM types that support it. + +Reviewed-by: James Houghton +Reviewed-by: Gavin Shan +Co-developed-by: Ackerley Tng +Signed-off-by: Ackerley Tng +Signed-off-by: Fuad Tabba +--- + .../testing/selftests/kvm/guest_memfd_test.c | 201 ++++++++++++++++-- + 1 file changed, 180 insertions(+), 21 deletions(-) + +diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c +index 341ba616cf55..5da2ed6277ac 100644 +--- a/tools/testing/selftests/kvm/guest_memfd_test.c ++++ b/tools/testing/selftests/kvm/guest_memfd_test.c +@@ -13,6 +13,8 @@ + + #include + #include ++#include ++#include + #include + #include + #include +@@ -34,12 +36,83 @@ static void test_file_read_write(int fd) + "pwrite on a guest_mem fd should fail"); + } + +-static void test_mmap(int fd, size_t page_size) ++static void test_mmap_supported(int fd, size_t page_size, size_t total_size) ++{ ++ const char val = 0xaa; ++ char *mem; ++ size_t i; ++ int ret; ++ ++ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); ++ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); ++ ++ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ++ TEST_ASSERT(mem != MAP_FAILED, "mmap() for shared guest memory should succeed."); ++ ++ memset(mem, val, total_size); ++ for (i = 0; i < total_size; i++) ++ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); ++ ++ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, ++ page_size); ++ TEST_ASSERT(!ret, "fallocate the first page should succeed."); ++ ++ for (i = 0; i < page_size; i++) ++ TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); ++ for (; i < total_size; i++) ++ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); ++ ++ memset(mem, val, page_size); ++ for (i = 0; i < total_size; i++) ++ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); ++ ++ ret = munmap(mem, total_size); ++ TEST_ASSERT(!ret, "munmap() should succeed."); ++} ++ ++static sigjmp_buf jmpbuf; ++void fault_sigbus_handler(int signum) ++{ ++ siglongjmp(jmpbuf, 1); ++} ++ ++static void test_fault_overflow(int fd, size_t page_size, size_t total_size) ++{ ++ struct sigaction sa_old, sa_new = { ++ .sa_handler = fault_sigbus_handler, ++ }; ++ size_t map_size = total_size * 4; ++ const char val = 0xaa; ++ char *mem; ++ size_t i; ++ int ret; ++ ++ mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ++ TEST_ASSERT(mem != MAP_FAILED, "mmap() for shared guest memory should succeed."); ++ ++ sigaction(SIGBUS, &sa_new, &sa_old); ++ if (sigsetjmp(jmpbuf, 1) == 0) { ++ memset(mem, 0xaa, map_size); ++ TEST_ASSERT(false, "memset() should have triggered SIGBUS."); ++ } ++ sigaction(SIGBUS, &sa_old, NULL); ++ ++ for (i = 0; i < total_size; i++) ++ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); ++ ++ ret = munmap(mem, map_size); ++ TEST_ASSERT(!ret, "munmap() should succeed."); ++} ++ ++static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) + { + char *mem; + + mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); ++ ++ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ++ TEST_ASSERT_EQ(mem, MAP_FAILED); + } + + static void test_file_size(int fd, size_t page_size, size_t total_size) +@@ -120,26 +193,19 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) + } + } + +-static void test_create_guest_memfd_invalid(struct kvm_vm *vm) ++static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, ++ uint64_t guest_memfd_flags, ++ size_t page_size) + { +- size_t page_size = getpagesize(); +- uint64_t flag; + size_t size; + int fd; + + for (size = 1; size < page_size; size++) { +- fd = __vm_create_guest_memfd(vm, size, 0); +- TEST_ASSERT(fd == -1 && errno == EINVAL, ++ fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); ++ TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", + size); + } +- +- for (flag = BIT(0); flag; flag <<= 1) { +- fd = __vm_create_guest_memfd(vm, page_size, flag); +- TEST_ASSERT(fd == -1 && errno == EINVAL, +- "guest_memfd() with flag '0x%lx' should fail with EINVAL", +- flag); +- } + } + + static void test_create_guest_memfd_multiple(struct kvm_vm *vm) +@@ -171,30 +237,123 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) + close(fd1); + } + +-int main(int argc, char *argv[]) ++static bool check_vm_type(unsigned long vm_type) + { +- size_t page_size; ++ /* ++ * Not all architectures support KVM_CAP_VM_TYPES. However, those that ++ * support guest_memfd have that support for the default VM type. ++ */ ++ if (vm_type == VM_TYPE_DEFAULT) ++ return true; ++ ++ return kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type); ++} ++ ++static void test_with_type(unsigned long vm_type, uint64_t guest_memfd_flags, ++ bool expect_mmap_allowed) ++{ ++ struct kvm_vm *vm; + size_t total_size; ++ size_t page_size; + int fd; +- struct kvm_vm *vm; + +- TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); ++ if (!check_vm_type(vm_type)) ++ return; + + page_size = getpagesize(); + total_size = page_size * 4; + +- vm = vm_create_barebones(); ++ vm = vm_create_barebones_type(vm_type); + +- test_create_guest_memfd_invalid(vm); + test_create_guest_memfd_multiple(vm); ++ test_create_guest_memfd_invalid_sizes(vm, guest_memfd_flags, page_size); + +- fd = vm_create_guest_memfd(vm, total_size, 0); ++ fd = vm_create_guest_memfd(vm, total_size, guest_memfd_flags); + + test_file_read_write(fd); +- test_mmap(fd, page_size); ++ ++ if (expect_mmap_allowed) { ++ test_mmap_supported(fd, page_size, total_size); ++ test_fault_overflow(fd, page_size, total_size); ++ ++ } else { ++ test_mmap_not_supported(fd, page_size, total_size); ++ } ++ + test_file_size(fd, page_size, total_size); + test_fallocate(fd, page_size, total_size); + test_invalid_punch_hole(fd, page_size, total_size); + + close(fd); ++ kvm_vm_free(vm); ++} ++ ++static void test_vm_type_gmem_flag_validity(unsigned long vm_type, ++ uint64_t expected_valid_flags) ++{ ++ size_t page_size = getpagesize(); ++ struct kvm_vm *vm; ++ uint64_t flag = 0; ++ int fd; ++ ++ if (!check_vm_type(vm_type)) ++ return; ++ ++ vm = vm_create_barebones_type(vm_type); ++ ++ for (flag = BIT(0); flag; flag <<= 1) { ++ fd = __vm_create_guest_memfd(vm, page_size, flag); ++ ++ if (flag & expected_valid_flags) { ++ TEST_ASSERT(fd >= 0, ++ "guest_memfd() with flag '0x%lx' should be valid", ++ flag); ++ close(fd); ++ } else { ++ TEST_ASSERT(fd < 0 && errno == EINVAL, ++ "guest_memfd() with flag '0x%lx' should fail with EINVAL", ++ flag); ++ } ++ } ++ ++ kvm_vm_free(vm); ++} ++ ++static void test_gmem_flag_validity(void) ++{ ++ uint64_t non_coco_vm_valid_flags = 0; ++ ++ if (kvm_has_cap(KVM_CAP_GMEM_SHARED_MEM)) ++ non_coco_vm_valid_flags = GUEST_MEMFD_FLAG_SUPPORT_SHARED; ++ ++ test_vm_type_gmem_flag_validity(VM_TYPE_DEFAULT, non_coco_vm_valid_flags); ++ ++#ifdef __x86_64__ ++ test_vm_type_gmem_flag_validity(KVM_X86_SW_PROTECTED_VM, non_coco_vm_valid_flags); ++ test_vm_type_gmem_flag_validity(KVM_X86_SEV_VM, 0); ++ test_vm_type_gmem_flag_validity(KVM_X86_SEV_ES_VM, 0); ++ test_vm_type_gmem_flag_validity(KVM_X86_SNP_VM, 0); ++ test_vm_type_gmem_flag_validity(KVM_X86_TDX_VM, 0); ++#endif ++} ++ ++int main(int argc, char *argv[]) ++{ ++ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); ++ ++ test_gmem_flag_validity(); ++ ++ test_with_type(VM_TYPE_DEFAULT, 0, false); ++ if (kvm_has_cap(KVM_CAP_GMEM_SHARED_MEM)) { ++ test_with_type(VM_TYPE_DEFAULT, GUEST_MEMFD_FLAG_SUPPORT_SHARED, ++ true); ++ } ++ ++#ifdef __x86_64__ ++ test_with_type(KVM_X86_SW_PROTECTED_VM, 0, false); ++ if (kvm_has_cap(KVM_CAP_GMEM_SHARED_MEM)) { ++ test_with_type(KVM_X86_SW_PROTECTED_VM, ++ GUEST_MEMFD_FLAG_SUPPORT_SHARED, true); ++ } ++#endif + } +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0011-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0011-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch deleted file mode 100644 index dcce661a60e..00000000000 --- a/resources/hiding_ci/linux_patches/10-direct-map-removal/0011-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch +++ /dev/null @@ -1,178 +0,0 @@ -From b1fc478976c93fd42b14e06d2de57e121be03142 Mon Sep 17 00:00:00 2001 -From: Patrick Roy -Date: Fri, 7 Feb 2025 14:33:01 +0000 -Subject: [PATCH 11/26] KVM: guest_memfd: Add flag to remove from direct map - -Add KVM_GMEM_NO_DIRECT_MAP flag for KVM_CREATE_GUEST_MEMFD() ioctl. When -set, guest_memfd folios will be removed from the direct map after -preparation, with direct map entries only restored when the folios are -freed. - -To ensure these folios do not end up in places where the kernel cannot -deal with them, set AS_NO_DIRECT_MAP on the guest_memfd's struct -address_space if KVM_GMEM_NO_DIRECT_MAP is requested. - -Add KVM_CAP_GMEM_NO_DIRECT_MAP to let userspace discover whether -guest_memfd supports KVM_GMEM_NO_DIRECT_MAP. Support depends on -guest_memfd itself being supported, but also on whether KVM can -manipulate the direct map at page granularity at all (possible most of -the time, just arm64 is a notable outlier where its impossible if the -direct map has been setup using hugepages, as arm64 cannot break these -apart due to break-before-make semantics). - -Note that this flag causes removal of direct map entries for all -guest_memfd folios independent of whether they are "shared" or "private" -(although current guest_memfd only supports either all folios in the -"shared" state, or all folios in the "private" state if -!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)). The usecase for removing -direct map entries of also the shared parts of guest_memfd are a special -type of non-CoCo VM where, host userspace is trusted to have access to -all of guest memory, but where Spectre-style transient execution attacks -through the host kernel's direct map should still be mitigated. - -Note that KVM retains access to guest memory via userspace -mappings of guest_memfd, which are reflected back into KVM's memslots -via userspace_addr. This is needed for things like MMIO emulation on -x86_64 to work. Previous iterations attempted to instead have KVM -temporarily restore direct map entries whenever such an access to guest -memory was needed, but this turned out to have a significant performance -impact, as well as additional complexity due to needing to refcount -direct map reinsertion operations and making them play nicely with gmem -truncations. - -This iteration also doesn't have KVM perform TLB flushes after direct -map manipulations. This is because TLB flushes resulted in a up to 40x -elongation of page faults in guest_memfd (scaling with the number of CPU -cores), or a 5x elongation of memory population. On the one hand, TLB -flushes are not needed for functional correctness (the virt->phys -mapping technically stays "correct", the kernel should simply to not it -for a while), so this is a correct optimization to make. On the other -hand, it means that the desired protection from Spectre-style attacks is -not perfect, as an attacker could try to prevent a stale TLB entry from -getting evicted, keeping it alive until the page it refers to is used by -the guest for some sensitive data, and then targeting it using a -spectre-gadget. - -Signed-off-by: Patrick Roy ---- - include/uapi/linux/kvm.h | 3 +++ - virt/kvm/guest_memfd.c | 28 +++++++++++++++++++++++++++- - virt/kvm/kvm_main.c | 5 +++++ - 3 files changed, 35 insertions(+), 1 deletion(-) - -diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h -index 117937a895da..fb02a93546d8 100644 ---- a/include/uapi/linux/kvm.h -+++ b/include/uapi/linux/kvm.h -@@ -930,6 +930,7 @@ struct kvm_enable_cap { - #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 - #define KVM_CAP_X86_GUEST_MODE 238 - #define KVM_CAP_GMEM_SHARED_MEM 239 -+#define KVM_CAP_GMEM_NO_DIRECT_MAP 240 - - struct kvm_irq_routing_irqchip { - __u32 irqchip; -@@ -1573,6 +1574,8 @@ struct kvm_create_guest_memfd { - __u64 reserved[6]; - }; - -+#define KVM_GMEM_NO_DIRECT_MAP (1ULL << 0) -+ - #define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory) - - struct kvm_pre_fault_memory { -diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index fbf89e643add..a2b96bc51391 100644 ---- a/virt/kvm/guest_memfd.c -+++ b/virt/kvm/guest_memfd.c -@@ -4,6 +4,7 @@ - #include - #include - #include -+#include - - #include "kvm_mm.h" - -@@ -50,8 +51,23 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo - return 0; - } - -+static bool kvm_gmem_test_no_direct_map(struct inode *inode) -+{ -+ return ((unsigned long) inode->i_private) & KVM_GMEM_NO_DIRECT_MAP; -+} -+ - static inline void kvm_gmem_mark_prepared(struct folio *folio) - { -+ struct inode *inode = folio_inode(folio); -+ -+ if (kvm_gmem_test_no_direct_map(inode)) { -+ int r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), -+ false); -+ -+ if (!r) -+ folio_set_private(folio); -+ } -+ - folio_mark_uptodate(folio); - } - -@@ -478,6 +494,10 @@ static void kvm_gmem_free_folio(struct folio *folio) - kvm_pfn_t pfn = page_to_pfn(page); - int order = folio_order(folio); - -+ if (folio_test_private(folio)) -+ WARN_ON_ONCE(set_direct_map_valid_noflush(folio_page(folio, 0), -+ folio_nr_pages(folio), true)); -+ - kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); - } - #endif -@@ -551,6 +571,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) - /* Unmovable mappings are supposed to be marked unevictable as well. */ - WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); - -+ if (flags & KVM_GMEM_NO_DIRECT_MAP) -+ mapping_set_no_direct_map(inode->i_mapping); -+ - kvm_get_kvm(kvm); - gmem->kvm = kvm; - xa_init(&gmem->bindings); -@@ -570,7 +593,10 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) - { - loff_t size = args->size; - u64 flags = args->flags; -- u64 valid_flags = 0; -+ u64 valid_flags = KVM_GMEM_NO_DIRECT_MAP; -+ -+ if (!can_set_direct_map()) -+ valid_flags &= ~KVM_GMEM_NO_DIRECT_MAP; - - if (flags & ~valid_flags) - return -EINVAL; -diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 3e40acb9f5c0..32ca1c921ab0 100644 ---- a/virt/kvm/kvm_main.c -+++ b/virt/kvm/kvm_main.c -@@ -65,6 +65,7 @@ - #include - - #include -+#include - - - /* Worst case buffer size needed for holding an integer. */ -@@ -4823,6 +4824,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) - return kvm_supported_mem_attributes(kvm); - #endif - #ifdef CONFIG_KVM_PRIVATE_MEM -+ case KVM_CAP_GMEM_NO_DIRECT_MAP: -+ if (!can_set_direct_map()) -+ return false; -+ fallthrough; - case KVM_CAP_GUEST_MEMFD: - return !kvm || kvm_arch_has_private_mem(kvm); - #endif --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0012-fixup-for-direct-map-removal-v4.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0012-fixup-for-direct-map-removal-v4.patch deleted file mode 100644 index c54565134f1..00000000000 --- a/resources/hiding_ci/linux_patches/10-direct-map-removal/0012-fixup-for-direct-map-removal-v4.patch +++ /dev/null @@ -1,51 +0,0 @@ -From ab44b2d5bfb7ef9b7bbb156d493f49a4bbebf014 Mon Sep 17 00:00:00 2001 -From: Nikita Kalyazin -Date: Thu, 10 Apr 2025 14:18:39 +0000 -Subject: [PATCH 12/26] fixup for direct map removal v4 - -Do not make kvm_gmem_free_folio dependent on -CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE . ---- - virt/kvm/guest_memfd.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index a2b96bc51391..291d647a5c80 100644 ---- a/virt/kvm/guest_memfd.c -+++ b/virt/kvm/guest_memfd.c -@@ -487,28 +487,28 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol - return MF_DELAYED; - } - --#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE - static void kvm_gmem_free_folio(struct folio *folio) - { -+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE - struct page *page = folio_page(folio, 0); - kvm_pfn_t pfn = page_to_pfn(page); - int order = folio_order(folio); -+#endif - - if (folio_test_private(folio)) - WARN_ON_ONCE(set_direct_map_valid_noflush(folio_page(folio, 0), - folio_nr_pages(folio), true)); - -+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE - kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); --} - #endif -+} - - static const struct address_space_operations kvm_gmem_aops = { - .dirty_folio = noop_dirty_folio, - .migrate_folio = kvm_gmem_migrate_folio, - .error_remove_folio = kvm_gmem_error_folio, --#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE - .free_folio = kvm_gmem_free_folio, --#endif - }; - - static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path, --- -2.47.1 - diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0020-filemap-Pass-address_space-mapping-to-free_folio.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0020-filemap-Pass-address_space-mapping-to-free_folio.patch new file mode 100644 index 00000000000..a3f04233a99 --- /dev/null +++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0020-filemap-Pass-address_space-mapping-to-free_folio.patch @@ -0,0 +1,214 @@ +From 98800ece9d818fa9cf08a16c16cd62bc472b569f Mon Sep 17 00:00:00 2001 +From: Elliot Berman +Date: Fri, 22 Nov 2024 09:29:38 -0800 +Subject: [PATCH 20/42] filemap: Pass address_space mapping to ->free_folio() + +When guest_memfd removes memory from the host kernel's direct map, +direct map entries must be restored before the memory is freed again. To +do so, ->free_folio() needs to know whether a gmem folio was direct map +removed in the first place though. While possible to keep track of this +information on each individual folio (e.g. via page flags), direct map +removal is an all-or-nothing property of the entire guest_memfd, so it +is less error prone to just check the flag stored in the gmem inode's +private data. However, by the time ->free_folio() is called, +folio->mapping might be cleared. To still allow access to the address +space from which the folio was just removed, pass it in as an additional +argument to ->free_folio, as the mapping is well-known to all callers. + +Link: https://lore.kernel.org/all/15f665b4-2d33-41ca-ac50-fafe24ade32f@redhat.com/ +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Signed-off-by: Elliot Berman +[patrick: rewrite shortlog for new usecase] +Signed-off-by: Patrick Roy +--- + Documentation/filesystems/locking.rst | 2 +- + fs/nfs/dir.c | 11 ++++++----- + fs/orangefs/inode.c | 3 ++- + include/linux/fs.h | 2 +- + mm/filemap.c | 9 +++++---- + mm/secretmem.c | 3 ++- + mm/vmscan.c | 4 ++-- + virt/kvm/guest_memfd.c | 3 ++- + 8 files changed, 21 insertions(+), 16 deletions(-) + +diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst +index 2e567e341c3b..21373864e6c2 100644 +--- a/Documentation/filesystems/locking.rst ++++ b/Documentation/filesystems/locking.rst +@@ -262,7 +262,7 @@ prototypes:: + sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidate_folio) (struct folio *, size_t start, size_t len); + bool (*release_folio)(struct folio *, gfp_t); +- void (*free_folio)(struct folio *); ++ void (*free_folio)(struct address_space *, struct folio *); + int (*direct_IO)(struct kiocb *, struct iov_iter *iter); + int (*migrate_folio)(struct address_space *, struct folio *dst, + struct folio *src, enum migrate_mode); +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index d0e0b435a843..5cb338f0d3a2 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -55,7 +55,7 @@ static int nfs_closedir(struct inode *, struct file *); + static int nfs_readdir(struct file *, struct dir_context *); + static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); + static loff_t nfs_llseek_dir(struct file *, loff_t, int); +-static void nfs_readdir_clear_array(struct folio *); ++static void nfs_readdir_clear_array(struct address_space *, struct folio *); + static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags); + +@@ -218,7 +218,8 @@ static void nfs_readdir_folio_init_array(struct folio *folio, u64 last_cookie, + /* + * we are freeing strings created by nfs_add_to_readdir_array() + */ +-static void nfs_readdir_clear_array(struct folio *folio) ++static void nfs_readdir_clear_array(struct address_space *mapping, ++ struct folio *folio) + { + struct nfs_cache_array *array; + unsigned int i; +@@ -233,7 +234,7 @@ static void nfs_readdir_clear_array(struct folio *folio) + static void nfs_readdir_folio_reinit_array(struct folio *folio, u64 last_cookie, + u64 change_attr) + { +- nfs_readdir_clear_array(folio); ++ nfs_readdir_clear_array(folio->mapping, folio); + nfs_readdir_folio_init_array(folio, last_cookie, change_attr); + } + +@@ -249,7 +250,7 @@ nfs_readdir_folio_array_alloc(u64 last_cookie, gfp_t gfp_flags) + static void nfs_readdir_folio_array_free(struct folio *folio) + { + if (folio) { +- nfs_readdir_clear_array(folio); ++ nfs_readdir_clear_array(folio->mapping, folio); + folio_put(folio); + } + } +@@ -391,7 +392,7 @@ static void nfs_readdir_folio_init_and_validate(struct folio *folio, u64 cookie, + if (folio_test_uptodate(folio)) { + if (nfs_readdir_folio_validate(folio, cookie, change_attr)) + return; +- nfs_readdir_clear_array(folio); ++ nfs_readdir_clear_array(folio->mapping, folio); + } + nfs_readdir_folio_init_array(folio, cookie, change_attr); + folio_mark_uptodate(folio); +diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c +index 08a6f372a352..14ac9ffc4431 100644 +--- a/fs/orangefs/inode.c ++++ b/fs/orangefs/inode.c +@@ -450,7 +450,8 @@ static bool orangefs_release_folio(struct folio *folio, gfp_t foo) + return !folio_test_private(folio); + } + +-static void orangefs_free_folio(struct folio *folio) ++static void orangefs_free_folio(struct address_space *mapping, ++ struct folio *folio) + { + kfree(folio_detach_private(folio)); + } +diff --git a/include/linux/fs.h b/include/linux/fs.h +index b085f161ed22..218d0d620633 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -457,7 +457,7 @@ struct address_space_operations { + sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidate_folio) (struct folio *, size_t offset, size_t len); + bool (*release_folio)(struct folio *, gfp_t); +- void (*free_folio)(struct folio *folio); ++ void (*free_folio)(struct address_space *, struct folio *folio); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); + /* + * migrate the contents of a folio to the specified target. If +diff --git a/mm/filemap.c b/mm/filemap.c +index bada249b9fb7..6af53c5096fc 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -226,11 +226,11 @@ void __filemap_remove_folio(struct folio *folio, void *shadow) + + void filemap_free_folio(struct address_space *mapping, struct folio *folio) + { +- void (*free_folio)(struct folio *); ++ void (*free_folio)(struct address_space *, struct folio *); + + free_folio = mapping->a_ops->free_folio; + if (free_folio) +- free_folio(folio); ++ free_folio(mapping, folio); + + folio_put_refs(folio, folio_nr_pages(folio)); + } +@@ -820,7 +820,8 @@ EXPORT_SYMBOL(file_write_and_wait_range); + void replace_page_cache_folio(struct folio *old, struct folio *new) + { + struct address_space *mapping = old->mapping; +- void (*free_folio)(struct folio *) = mapping->a_ops->free_folio; ++ void (*free_folio)(struct address_space *, struct folio *) = ++ mapping->a_ops->free_folio; + pgoff_t offset = old->index; + XA_STATE(xas, &mapping->i_pages, offset); + +@@ -849,7 +850,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new) + __lruvec_stat_add_folio(new, NR_SHMEM); + xas_unlock_irq(&xas); + if (free_folio) +- free_folio(old); ++ free_folio(mapping, old); + folio_put(old); + } + EXPORT_SYMBOL_GPL(replace_page_cache_folio); +diff --git a/mm/secretmem.c b/mm/secretmem.c +index 589b26c2d553..6eb3bbe34d08 100644 +--- a/mm/secretmem.c ++++ b/mm/secretmem.c +@@ -152,7 +152,8 @@ static int secretmem_migrate_folio(struct address_space *mapping, + return -EBUSY; + } + +-static void secretmem_free_folio(struct folio *folio) ++static void secretmem_free_folio(struct address_space *mapping, ++ struct folio *folio) + { + set_direct_map_default_noflush(&folio->page); + folio_zero_segment(folio, 0, folio_size(folio)); +diff --git a/mm/vmscan.c b/mm/vmscan.c +index f8dfd2864bbf..e23e1a44b92c 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -797,7 +797,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio, + xa_unlock_irq(&mapping->i_pages); + put_swap_folio(folio, swap); + } else { +- void (*free_folio)(struct folio *); ++ void (*free_folio)(struct address_space *, struct folio *); + + free_folio = mapping->a_ops->free_folio; + /* +@@ -826,7 +826,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio, + spin_unlock(&mapping->host->i_lock); + + if (free_folio) +- free_folio(folio); ++ free_folio(mapping, folio); + } + + return 1; +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index ebdb2d8bf57a..dfb799d0cead 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -433,7 +433,8 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol + } + + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE +-static void kvm_gmem_free_folio(struct folio *folio) ++static void kvm_gmem_free_folio(struct address_space *mapping, ++ struct folio *folio) + { + struct page *page = folio_page(folio, 0); + kvm_pfn_t pfn = page_to_pfn(page); +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0021-arch-export-set_direct_map_valid_noflush-to-KVM-modu.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0021-arch-export-set_direct_map_valid_noflush-to-KVM-modu.patch new file mode 100644 index 00000000000..d300a1d7d00 --- /dev/null +++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0021-arch-export-set_direct_map_valid_noflush-to-KVM-modu.patch @@ -0,0 +1,85 @@ +From 6d909fb5883da3a6b752d28438d2497637b8e9db Mon Sep 17 00:00:00 2001 +From: Patrick Roy +Date: Mon, 2 Jun 2025 12:06:10 +0100 +Subject: [PATCH 21/42] arch: export set_direct_map_valid_noflush to KVM module + +Use the new per-module export functionality to allow KVM (and only KVM) +access to set_direct_map_valid_noflush(). This allows guest_memfd to +remove its memory from the direct map, even if KVM is built as a module. + +Direct map removal gives guest_memfd the same protection that +memfd_secret enjoys, such as hardening against Spectre-like attacks +through in-kernel gadgets. + +Signed-off-by: Patrick Roy +--- + arch/arm64/mm/pageattr.c | 1 + + arch/loongarch/mm/pageattr.c | 1 + + arch/riscv/mm/pageattr.c | 1 + + arch/s390/mm/pageattr.c | 1 + + arch/x86/mm/pat/set_memory.c | 1 + + 5 files changed, 5 insertions(+) + +diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c +index 04d4a8f676db..ff454bc6e9a2 100644 +--- a/arch/arm64/mm/pageattr.c ++++ b/arch/arm64/mm/pageattr.c +@@ -291,6 +291,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) + + return set_memory_valid(addr, nr, valid); + } ++EXPORT_SYMBOL_GPL_FOR_MODULES(set_direct_map_valid_noflush, "kvm"); + + #ifdef CONFIG_DEBUG_PAGEALLOC + /* +diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c +index 99165903908a..43c1a873a469 100644 +--- a/arch/loongarch/mm/pageattr.c ++++ b/arch/loongarch/mm/pageattr.c +@@ -217,6 +217,7 @@ int set_direct_map_invalid_noflush(struct page *page) + + return __set_memory(addr, 1, __pgprot(0), __pgprot(_PAGE_PRESENT | _PAGE_VALID)); + } ++EXPORT_SYMBOL_GPL_FOR_MODULES(set_direct_map_valid_noflush, "kvm"); + + int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) + { +diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c +index d815448758a1..3a1627e0eeb4 100644 +--- a/arch/riscv/mm/pageattr.c ++++ b/arch/riscv/mm/pageattr.c +@@ -400,6 +400,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) + + return __set_memory((unsigned long)page_address(page), nr, set, clear); + } ++EXPORT_SYMBOL_GPL_FOR_MODULES(set_direct_map_valid_noflush, "kvm"); + + #ifdef CONFIG_DEBUG_PAGEALLOC + static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data) +diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c +index 348e759840e7..392ce9194f86 100644 +--- a/arch/s390/mm/pageattr.c ++++ b/arch/s390/mm/pageattr.c +@@ -413,6 +413,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) + + return __set_memory((unsigned long)page_to_virt(page), nr, flags); + } ++EXPORT_SYMBOL_GPL_FOR_MODULES(set_direct_map_valid_noflush, "kvm"); + + bool kernel_page_present(struct page *page) + { +diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c +index 8834c76f91c9..ab469de18c4d 100644 +--- a/arch/x86/mm/pat/set_memory.c ++++ b/arch/x86/mm/pat/set_memory.c +@@ -2661,6 +2661,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) + + return __set_pages_np(page, nr); + } ++EXPORT_SYMBOL_GPL_FOR_MODULES(set_direct_map_valid_noflush, "kvm"); + + #ifdef CONFIG_DEBUG_PAGEALLOC + void __kernel_map_pages(struct page *page, int numpages, int enable) +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0010-mm-introduce-AS_NO_DIRECT_MAP.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0022-mm-introduce-AS_NO_DIRECT_MAP.patch similarity index 83% rename from resources/hiding_ci/linux_patches/10-direct-map-removal/0010-mm-introduce-AS_NO_DIRECT_MAP.patch rename to resources/hiding_ci/linux_patches/10-direct-map-removal/0022-mm-introduce-AS_NO_DIRECT_MAP.patch index bd336166268..f7ca3c23a73 100644 --- a/resources/hiding_ci/linux_patches/10-direct-map-removal/0010-mm-introduce-AS_NO_DIRECT_MAP.patch +++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0022-mm-introduce-AS_NO_DIRECT_MAP.patch @@ -1,7 +1,7 @@ -From 22ec89c0ff7af3430027cf71cf8bce5c8ed6e402 Mon Sep 17 00:00:00 2001 +From 958156f418ccaac8c55d7e290c4eae03fc2792c3 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 7 Feb 2025 11:16:06 +0000 -Subject: [PATCH 10/26] mm: introduce AS_NO_DIRECT_MAP +Subject: [PATCH 22/42] mm: introduce AS_NO_DIRECT_MAP Add AS_NO_DIRECT_MAP for mappings where direct map entries of folios are set to not present . Currently, mappings that match this description are @@ -38,19 +38,19 @@ Signed-off-by: Patrick Roy 6 files changed, 23 insertions(+), 37 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h -index 47bfc6b1b632..903b41e89cf8 100644 +index e63fbfbd5b0f..d7407dde2b61 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h -@@ -210,6 +210,7 @@ enum mapping_flags { - AS_STABLE_WRITES = 7, /* must wait for writeback before modifying +@@ -211,6 +211,7 @@ enum mapping_flags { folio contents */ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ -+ AS_NO_DIRECT_MAP = 9, /* Folios in the mapping are not in the direct map */ + AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, ++ AS_NO_DIRECT_MAP = 10, /* Folios in the mapping are not in the direct map */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, -@@ -335,6 +336,21 @@ static inline bool mapping_inaccessible(struct address_space *mapping) - return test_bit(AS_INACCESSIBLE, &mapping->flags); +@@ -346,6 +347,21 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_spac + return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } +static inline void mapping_set_no_direct_map(struct address_space *mapping) @@ -120,11 +120,11 @@ index c4b0f376fb34..33f173a607ad 100644 r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); diff --git a/mm/gup.c b/mm/gup.c -index 3883b307780e..b1483a876740 100644 +index e065a49842a8..0f0af9f23324 100644 --- a/mm/gup.c +++ b/mm/gup.c -@@ -1283,7 +1283,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) - if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) +@@ -1276,7 +1276,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) + if ((gup_flags & FOLL_SPLIT_PMD) && is_vm_hugetlb_page(vma)) return -EOPNOTSUPP; - if (vma_is_secretmem(vma)) @@ -132,7 +132,7 @@ index 3883b307780e..b1483a876740 100644 return -EFAULT; if (write) { -@@ -2786,7 +2786,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) +@@ -2769,7 +2769,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) { bool reject_file_backed = false; struct address_space *mapping; @@ -140,7 +140,7 @@ index 3883b307780e..b1483a876740 100644 unsigned long mapping_flags; /* -@@ -2798,14 +2797,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) +@@ -2781,14 +2780,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) reject_file_backed = true; /* We hold a folio reference, so we can safely access folio fields. */ @@ -155,7 +155,7 @@ index 3883b307780e..b1483a876740 100644 if (WARN_ON_ONCE(folio_test_slab(folio))) return false; -@@ -2847,8 +2838,9 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) +@@ -2830,8 +2821,9 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) * At this point, we know the mapping is non-null and points to an * address_space object. */ @@ -167,10 +167,10 @@ index 3883b307780e..b1483a876740 100644 return !reject_file_backed || shmem_mapping(mapping); } diff --git a/mm/mlock.c b/mm/mlock.c -index cde076fa7d5e..a43f308be70d 100644 +index 3cb72b579ffd..6cde2a5073f0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c -@@ -474,7 +474,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, +@@ -476,7 +476,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, if (newflags == oldflags || (oldflags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || @@ -180,10 +180,10 @@ index cde076fa7d5e..a43f308be70d 100644 goto out; diff --git a/mm/secretmem.c b/mm/secretmem.c -index 1b0a214ee558..ea4c04d469b1 100644 +index 6eb3bbe34d08..18986567fc0e 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c -@@ -136,11 +136,6 @@ static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) +@@ -136,11 +136,6 @@ static int secretmem_mmap_prepare(struct vm_area_desc *desc) return 0; } @@ -194,8 +194,8 @@ index 1b0a214ee558..ea4c04d469b1 100644 - static const struct file_operations secretmem_fops = { .release = secretmem_release, - .mmap = secretmem_mmap, -@@ -214,6 +209,7 @@ static struct file *secretmem_file_create(unsigned long flags) + .mmap_prepare = secretmem_mmap_prepare, +@@ -215,6 +210,7 @@ static struct file *secretmem_file_create(unsigned long flags) mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_unevictable(inode->i_mapping); @@ -204,5 +204,5 @@ index 1b0a214ee558..ea4c04d469b1 100644 inode->i_op = &secretmem_iops; inode->i_mapping->a_ops = &secretmem_aops; -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0023-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0023-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch new file mode 100644 index 00000000000..52738b7c57e --- /dev/null +++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0023-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch @@ -0,0 +1,242 @@ +From 469cd9a1a5380aa110b6e20a7efab5ae8762d7d3 Mon Sep 17 00:00:00 2001 +From: Patrick Roy +Date: Fri, 7 Feb 2025 14:33:01 +0000 +Subject: [PATCH 23/42] KVM: guest_memfd: Add flag to remove from direct map + +Add KVM_GMEM_NO_DIRECT_MAP flag for KVM_CREATE_GUEST_MEMFD() ioctl. When +set, guest_memfd folios will be removed from the direct map after +preparation, with direct map entries only restored when the folios are +freed. + +To ensure these folios do not end up in places where the kernel cannot +deal with them, set AS_NO_DIRECT_MAP on the guest_memfd's struct +address_space if KVM_GMEM_NO_DIRECT_MAP is requested. + +Add KVM_CAP_GMEM_NO_DIRECT_MAP to let userspace discover whether +guest_memfd supports KVM_GMEM_NO_DIRECT_MAP. Support depends on +guest_memfd itself being supported, but also on whether KVM can +manipulate the direct map at page granularity at all (possible most of +the time, just arm64 is a notable outlier where its impossible if the +direct map has been setup using hugepages, as arm64 cannot break these +apart due to break-before-make semantics). + +Note that this flag causes removal of direct map entries for all +guest_memfd folios independent of whether they are "shared" or "private" +(although current guest_memfd only supports either all folios in the +"shared" state, or all folios in the "private" state if +!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)). The usecase for removing +direct map entries of also the shared parts of guest_memfd are a special +type of non-CoCo VM where, host userspace is trusted to have access to +all of guest memory, but where Spectre-style transient execution attacks +through the host kernel's direct map should still be mitigated. In this +setup, KVM retains access to guest memory via userspace mappings of +guest_memfd, which are reflected back into KVM's memslots via +userspace_addr. This is needed for things like MMIO emulation on x86_64 +to work. + +Do not perform TLB flushes after direct map manipulations. This is +because TLB flushes resulted in a up to 40x elongation of page faults in +guest_memfd (scaling with the number of CPU cores), or a 5x elongation +of memory population. TLB flushes are not needed for functional +correctness (the virt->phys mapping technically stays "correct", the +kernel should simply to not it for a while). On the other hand, it means +that the desired protection from Spectre-style attacks is not perfect, +as an attacker could try to prevent a stale TLB entry from getting +evicted, keeping it alive until the page it refers to is used by the +guest for some sensitive data, and then targeting it using a +spectre-gadget. + +Signed-off-by: Patrick Roy +--- + arch/arm64/include/asm/kvm_host.h | 12 +++++++++++- + include/linux/kvm_host.h | 7 +++++++ + include/uapi/linux/kvm.h | 2 ++ + virt/kvm/guest_memfd.c | 29 +++++++++++++++++++++++++---- + virt/kvm/kvm_main.c | 5 +++++ + 5 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h +index 678e7b93bb01..27490e8ae9f6 100644 +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1678,6 +1679,15 @@ void check_feature_map(void); + #ifdef CONFIG_KVM_GMEM + #define kvm_arch_supports_gmem(kvm) true + #define kvm_arch_supports_gmem_shared_mem(kvm) IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) +-#endif ++ ++static inline bool kvm_arch_gmem_supports_no_direct_map(void) { ++ /* ++ * Without FWB, direct map access is needed in kvm_pgtable_stage2_map(), ++ * as it calls dcache_clean_inval_poc(). ++ */ ++ return can_set_direct_map() && cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); ++} ++#define kvm_arch_gmem_supports_no_direct_map kvm_arch_gmem_supports_no_direct_map ++#endif /* CONFIG_KVM_GMEM */ + + #endif /* __ARM64_KVM_HOST_H__ */ +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 58d7761c2a90..1ad1d7f4ac6e 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -743,6 +744,12 @@ static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm) + } + #endif + ++#ifdef CONFIG_KVM_GMEM ++#ifndef kvm_arch_gmem_supports_no_direct_map ++#define kvm_arch_gmem_supports_no_direct_map can_set_direct_map ++#endif ++#endif /* CONFIG_KVM_GMEM */ ++ + #ifndef kvm_arch_has_readonly_mem + static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) + { +diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h +index 4fba730ec62b..cfb99bfd496d 100644 +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -957,6 +957,7 @@ struct kvm_enable_cap { + #define KVM_CAP_ARM_EL2_E2H0 241 + #define KVM_CAP_RISCV_MP_STATE_RESET 242 + #define KVM_CAP_GMEM_SHARED_MEM 243 ++#define KVM_CAP_GMEM_NO_DIRECT_MAP 244 + + struct kvm_irq_routing_irqchip { + __u32 irqchip; +@@ -1594,6 +1595,7 @@ struct kvm_memory_attributes { + + #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + #define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) ++#define GUEST_MEMFD_FLAG_NO_DIRECT_MAP (1ULL << 1) + + struct kvm_create_guest_memfd { + __u64 size; +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index dfb799d0cead..99e1b20a9977 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + + #include "kvm_mm.h" + +@@ -42,8 +43,18 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo + return 0; + } + ++static bool kvm_gmem_test_no_direct_map(struct inode *inode) ++{ ++ return ((unsigned long) inode->i_private) & GUEST_MEMFD_FLAG_NO_DIRECT_MAP; ++} ++ + static inline void kvm_gmem_mark_prepared(struct folio *folio) + { ++ struct inode *inode = folio_inode(folio); ++ ++ if (kvm_gmem_test_no_direct_map(inode)) ++ set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), false); ++ + folio_mark_uptodate(folio); + } + +@@ -432,25 +443,29 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol + return MF_DELAYED; + } + +-#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE + static void kvm_gmem_free_folio(struct address_space *mapping, + struct folio *folio) + { + struct page *page = folio_page(folio, 0); ++ ++#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE + kvm_pfn_t pfn = page_to_pfn(page); + int order = folio_order(folio); ++#endif + ++ if (kvm_gmem_test_no_direct_map(mapping->host)) ++ WARN_ON_ONCE(set_direct_map_valid_noflush(page, folio_nr_pages(folio), true)); ++ ++#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE + kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); +-} + #endif ++} + + static const struct address_space_operations kvm_gmem_aops = { + .dirty_folio = noop_dirty_folio, + .migrate_folio = kvm_gmem_migrate_folio, + .error_remove_folio = kvm_gmem_error_folio, +-#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE + .free_folio = kvm_gmem_free_folio, +-#endif + }; + + static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path, +@@ -513,6 +528,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) + /* Unmovable mappings are supposed to be marked unevictable as well. */ + WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); + ++ if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP) ++ mapping_set_no_direct_map(inode->i_mapping); ++ + kvm_get_kvm(kvm); + gmem->kvm = kvm; + xa_init(&gmem->bindings); +@@ -537,6 +555,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) + if (kvm_arch_supports_gmem_shared_mem(kvm)) + valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; + ++ if (kvm_arch_gmem_supports_no_direct_map()) ++ valid_flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP; ++ + if (flags & ~valid_flags) + return -EINVAL; + +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 441c9b53b876..d99d820a5a29 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -65,6 +65,7 @@ + #include + + #include ++#include + + + /* Worst case buffer size needed for holding an integer. */ +@@ -4911,6 +4912,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) + return kvm_supported_mem_attributes(kvm); + #endif + #ifdef CONFIG_KVM_GMEM ++ case KVM_CAP_GMEM_NO_DIRECT_MAP: ++ if (!can_set_direct_map()) ++ return false; ++ fallthrough; + case KVM_CAP_GUEST_MEMFD: + return !kvm || kvm_arch_supports_gmem(kvm); + #endif +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/11-kvm-clock/0024-de-gpc-ify-kvm-clock.patch b/resources/hiding_ci/linux_patches/11-kvm-clock/0024-de-gpc-ify-kvm-clock.patch new file mode 100644 index 00000000000..569fede2eb8 --- /dev/null +++ b/resources/hiding_ci/linux_patches/11-kvm-clock/0024-de-gpc-ify-kvm-clock.patch @@ -0,0 +1,147 @@ +From fbc945f79a95e653951563d360762698a0c2f077 Mon Sep 17 00:00:00 2001 +From: Patrick Roy +Date: Tue, 3 Jun 2025 13:57:15 +0100 +Subject: [PATCH 24/42] de-gpc-ify kvm-clock + +Signed-off-by: Patrick Roy +--- + arch/x86/include/asm/kvm_host.h | 2 +- + arch/x86/kvm/x86.c | 47 ++++++++++----------------------- + 2 files changed, 15 insertions(+), 34 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 7b9ccdd99f32..d4130899bd08 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -924,7 +924,7 @@ struct kvm_vcpu_arch { + s8 pvclock_tsc_shift; + u32 pvclock_tsc_mul; + unsigned int hw_tsc_khz; +- struct gfn_to_pfn_cache pv_time; ++ gpa_t system_time; + /* set guest stopped flag in pvclock flags field */ + bool pvclock_set_guest_stopped_request; + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index e21f5f2fe059..03f1a5d6b2b0 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2349,12 +2349,9 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, + + /* we verify if the enable bit is set... */ + if (system_time & 1) +- kvm_gpc_activate(&vcpu->arch.pv_time, system_time & ~1ULL, +- sizeof(struct pvclock_vcpu_time_info)); ++ vcpu->arch.system_time = system_time & ~1ULL; + else +- kvm_gpc_deactivate(&vcpu->arch.pv_time); +- +- return; ++ vcpu->arch.system_time = INVALID_GPA; + } + + static uint32_t div_frac(uint32_t dividend, uint32_t divisor) +@@ -3148,26 +3145,14 @@ u64 get_kvmclock_ns(struct kvm *kvm) + + static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock, + struct kvm_vcpu *vcpu, +- struct gfn_to_pfn_cache *gpc, +- unsigned int offset) ++ gpa_t gpa) + { +- struct pvclock_vcpu_time_info *guest_hv_clock; ++ struct pvclock_vcpu_time_info guest_hv_clock; + struct pvclock_vcpu_time_info hv_clock; +- unsigned long flags; + + memcpy(&hv_clock, ref_hv_clock, sizeof(hv_clock)); + +- read_lock_irqsave(&gpc->lock, flags); +- while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) { +- read_unlock_irqrestore(&gpc->lock, flags); +- +- if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock))) +- return; +- +- read_lock_irqsave(&gpc->lock, flags); +- } +- +- guest_hv_clock = (void *)(gpc->khva + offset); ++ kvm_read_guest(vcpu->kvm, gpa, &guest_hv_clock, sizeof(struct pvclock_vcpu_time_info)); + + /* + * This VCPU is paused, but it's legal for a guest to read another +@@ -3176,20 +3161,18 @@ static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock, + * it is consistent. + */ + +- guest_hv_clock->version = hv_clock.version = (guest_hv_clock->version + 1) | 1; ++ guest_hv_clock.version = hv_clock.version = (guest_hv_clock.version + 1) | 1; + smp_wmb(); + + /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ +- hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); ++ hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); + +- memcpy(guest_hv_clock, &hv_clock, sizeof(*guest_hv_clock)); ++ kvm_write_guest(vcpu->kvm, gpa, &hv_clock, sizeof(struct pvclock_vcpu_time_info)); + + smp_wmb(); + +- guest_hv_clock->version = ++hv_clock.version; +- +- kvm_gpc_mark_dirty_in_slot(gpc); +- read_unlock_irqrestore(&gpc->lock, flags); ++ ++hv_clock.version; ++ kvm_write_guest(vcpu->kvm, gpa + offsetof(struct pvclock_vcpu_time_info, version), &hv_clock.version, sizeof(hv_clock.version)); + + trace_kvm_pvclock_update(vcpu->vcpu_id, &hv_clock); + } +@@ -3280,7 +3263,7 @@ int kvm_guest_time_update(struct kvm_vcpu *v) + if (use_master_clock) + hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT; + +- if (vcpu->pv_time.active) { ++ if (vcpu->system_time != INVALID_GPA) { + /* + * GUEST_STOPPED is only supported by kvmclock, and KVM's + * historic behavior is to only process the request if kvmclock +@@ -3290,7 +3273,7 @@ int kvm_guest_time_update(struct kvm_vcpu *v) + hv_clock.flags |= PVCLOCK_GUEST_STOPPED; + vcpu->pvclock_set_guest_stopped_request = false; + } +- kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->pv_time, 0); ++ kvm_setup_guest_pvclock(&hv_clock, v, vcpu->system_time); + + hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED; + } +@@ -3606,7 +3589,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data) + + static void kvmclock_reset(struct kvm_vcpu *vcpu) + { +- kvm_gpc_deactivate(&vcpu->arch.pv_time); ++ vcpu->arch.system_time = INVALID_GPA; + vcpu->arch.time = 0; + } + +@@ -5727,7 +5710,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, + */ + static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) + { +- if (!vcpu->arch.pv_time.active) ++ if (vcpu->arch.system_time == INVALID_GPA) + return -EINVAL; + vcpu->arch.pvclock_set_guest_stopped_request = true; + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); +@@ -12334,8 +12317,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) + vcpu->arch.regs_avail = ~0; + vcpu->arch.regs_dirty = ~0; + +- kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm); +- + if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) + kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); + else +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0013-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0001-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch similarity index 72% rename from resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0013-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch rename to resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0001-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch index f4a62443b72..a4b9c3c88fa 100644 --- a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0013-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0001-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch @@ -1,7 +1,7 @@ -From 48a178e27031d5eac97ba0630686fcf3034e88ed Mon Sep 17 00:00:00 2001 +From 7da9dbce26d357193434938cf5e6095c01cdd727 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 9 Jan 2025 20:49:17 +0000 -Subject: [PATCH 13/26] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap +Subject: [PATCH 01/12] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap Use one of the 14 reserved u64s in struct kvm_userspace_memory_region2 for the user to provide `userfault_bitmap`. @@ -16,14 +16,14 @@ Signed-off-by: James Houghton include/linux/kvm_host.h | 14 ++++++++++++++ include/uapi/linux/kvm.h | 4 +++- virt/kvm/Kconfig | 3 +++ - virt/kvm/kvm_main.c | 36 ++++++++++++++++++++++++++++++++++++ - 4 files changed, 56 insertions(+), 1 deletion(-) + virt/kvm/kvm_main.c | 35 +++++++++++++++++++++++++++++++++++ + 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index ec3bedc18eab..6cd0d910678e 100644 +index 1ad1d7f4ac6e..0d7e46b00886 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h -@@ -596,6 +596,7 @@ struct kvm_memory_slot { +@@ -599,6 +599,7 @@ struct kvm_memory_slot { unsigned long *dirty_bitmap; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; @@ -31,7 +31,7 @@ index ec3bedc18eab..6cd0d910678e 100644 u32 flags; short id; u16 as_id; -@@ -746,6 +747,11 @@ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +@@ -757,6 +758,11 @@ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) } #endif @@ -43,8 +43,8 @@ index ec3bedc18eab..6cd0d910678e 100644 struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; -@@ -2592,4 +2598,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, - void kvm_gmem_handle_folio_put(struct folio *folio); +@@ -2663,4 +2669,12 @@ static inline int kvm_enable_virtualization(void) { return 0; } + static inline void kvm_disable_virtualization(void) { } #endif +int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -57,7 +57,7 @@ index ec3bedc18eab..6cd0d910678e 100644 + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h -index fb02a93546d8..03676746be71 100644 +index cfb99bfd496d..24d27bcc5c51 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -40,7 +40,8 @@ struct kvm_userspace_memory_region2 { @@ -74,27 +74,26 @@ index fb02a93546d8..03676746be71 100644 #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) #define KVM_MEM_GUEST_MEMFD (1UL << 2) -+#define KVM_MEM_USERFAULT (1UL << 3) ++#define KVM_MEM_USERFAULT (1UL << 3) /* for KVM_IRQ_LINE */ struct kvm_irq_level { diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig -index 4e759e8020c5..7987fed3f3ec 100644 +index e90884f74404..9e45d72f8057 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig -@@ -128,3 +128,6 @@ config HAVE_KVM_ARCH_GMEM_INVALIDATE +@@ -132,3 +132,6 @@ config HAVE_KVM_ARCH_GMEM_INVALIDATE config KVM_GMEM_SHARED_MEM - select KVM_PRIVATE_MEM + select KVM_GMEM bool + +config HAVE_KVM_USERFAULT + bool -\ No newline at end of file diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 32ca1c921ab0..fb3ccf0cbb04 100644 +index d99d820a5a29..c00094348f80 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c -@@ -1532,6 +1532,9 @@ static int check_memory_region_flags(struct kvm *kvm, +@@ -1605,6 +1605,9 @@ static int check_memory_region_flags(struct kvm *kvm, !(mem->flags & KVM_MEM_GUEST_MEMFD)) valid_flags |= KVM_MEM_READONLY; @@ -104,37 +103,36 @@ index 32ca1c921ab0..fb3ccf0cbb04 100644 if (mem->flags & ~valid_flags) return -EINVAL; -@@ -1968,6 +1971,13 @@ static int kvm_set_memory_region(struct kvm *kvm, +@@ -2040,6 +2043,12 @@ static int kvm_set_memory_region(struct kvm *kvm, + if (id < KVM_USER_MEM_SLOTS && (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; - + if (mem->flags & KVM_MEM_USERFAULT && + ((mem->userfault_bitmap != untagged_addr(mem->userfault_bitmap)) || -+ !access_ok((void __user *)(unsigned long)mem->userfault_bitmap, -+ DIV_ROUND_UP(mem->memory_size >> PAGE_SHIFT, BITS_PER_LONG) -+ * sizeof(long)))) ++ !access_ok((void __user *)(unsigned long)mem->userfault_bitmap, ++ DIV_ROUND_UP(mem->memory_size >> PAGE_SHIFT, BITS_PER_LONG) ++ * sizeof(long)))) + return -EINVAL; -+ + slots = __kvm_memslots(kvm, as_id); - /* -@@ -2035,6 +2045,9 @@ static int kvm_set_memory_region(struct kvm *kvm, +@@ -2108,6 +2117,9 @@ static int kvm_set_memory_region(struct kvm *kvm, if (r) goto out; } + if (mem->flags & KVM_MEM_USERFAULT) + new->userfault_bitmap = -+ (unsigned long __user *)(unsigned long)mem->userfault_bitmap; ++ (unsigned long __user *)(unsigned long)mem->userfault_bitmap; r = kvm_set_memslot(kvm, old, new, change); if (r) -@@ -6468,3 +6481,26 @@ void kvm_exit(void) +@@ -6545,3 +6557,26 @@ void kvm_exit(void) kvm_irqfd_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot, -+ gfn_t gfn) ++ gfn_t gfn) +{ + unsigned long bitmap_chunk = 0; + off_t offset; @@ -155,7 +153,6 @@ index 32ca1c921ab0..fb3ccf0cbb04 100644 + /* Set in the bitmap means that the gfn is userfault */ + return !!(bitmap_chunk & (1ul << (offset % BITS_PER_LONG))); +} -\ No newline at end of file -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0014-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0002-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch similarity index 76% rename from resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0014-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch rename to resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0002-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch index dddc2b9dbfd..9367b6c9350 100644 --- a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0014-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0002-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch @@ -1,7 +1,7 @@ -From 51a78015a0114ceaf1930739bba6111b1bc09f87 Mon Sep 17 00:00:00 2001 +From 783b11ddc87a63b9299b67b679c37bc5d58f9922 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 9 Jan 2025 20:49:18 +0000 -Subject: [PATCH 14/26] KVM: Add KVM_MEMORY_EXIT_FLAG_USERFAULT +Subject: [PATCH 02/12] KVM: Add KVM_MEMORY_EXIT_FLAG_USERFAULT This flag is used for vCPU memory faults caused by KVM Userfault; i.e., the bit in `userfault_bitmap` corresponding to the faulting gfn was set. @@ -12,10 +12,10 @@ Signed-off-by: James Houghton 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h -index 03676746be71..0e1a2fac5735 100644 +index 24d27bcc5c51..4ce2282f7a18 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h -@@ -444,6 +444,7 @@ struct kvm_run { +@@ -446,6 +446,7 @@ struct kvm_run { /* KVM_EXIT_MEMORY_FAULT */ struct { #define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) @@ -24,5 +24,5 @@ index 03676746be71..0e1a2fac5735 100644 __u64 gpa; __u64 size; -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0015-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0003-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch similarity index 82% rename from resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0015-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch rename to resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0003-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch index 7960341db8a..574e1664c8c 100644 --- a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0015-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0003-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch @@ -1,7 +1,7 @@ -From ed691412fd9414d3b9124e2416f6cae3f21a1071 Mon Sep 17 00:00:00 2001 +From 852a599e29590c3339191006bb15b6396dbda0cb Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 9 Jan 2025 20:49:19 +0000 -Subject: [PATCH 15/26] KVM: Allow late setting of KVM_MEM_USERFAULT on +Subject: [PATCH 03/12] KVM: Allow late setting of KVM_MEM_USERFAULT on guest_memfd memslot Currently guest_memfd memslots can only be deleted. Slightly change the @@ -14,10 +14,10 @@ Signed-off-by: James Houghton 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index fb3ccf0cbb04..c60fe692de03 100644 +index c00094348f80..2208033d5dd4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c -@@ -2009,9 +2009,6 @@ static int kvm_set_memory_region(struct kvm *kvm, +@@ -2081,9 +2081,6 @@ static int kvm_set_memory_region(struct kvm *kvm, if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages) return -EINVAL; } else { /* Modify an existing slot. */ @@ -27,7 +27,7 @@ index fb3ccf0cbb04..c60fe692de03 100644 if ((mem->userspace_addr != old->userspace_addr) || (npages != old->npages) || ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) -@@ -2025,6 +2022,16 @@ static int kvm_set_memory_region(struct kvm *kvm, +@@ -2097,6 +2094,16 @@ static int kvm_set_memory_region(struct kvm *kvm, return 0; } @@ -44,7 +44,7 @@ index fb3ccf0cbb04..c60fe692de03 100644 if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) && kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages)) return -EEXIST; -@@ -2040,7 +2047,7 @@ static int kvm_set_memory_region(struct kvm *kvm, +@@ -2112,7 +2119,7 @@ static int kvm_set_memory_region(struct kvm *kvm, new->npages = npages; new->flags = mem->flags; new->userspace_addr = mem->userspace_addr; @@ -54,5 +54,5 @@ index fb3ccf0cbb04..c60fe692de03 100644 if (r) goto out; -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0016-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0004-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch similarity index 74% rename from resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0016-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch rename to resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0004-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch index ca31ca9518b..0c785643479 100644 --- a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0016-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0004-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch @@ -1,7 +1,7 @@ -From fa324f2e503cd36dc357c3eb9b807e02f9b6206e Mon Sep 17 00:00:00 2001 +From c2287b66f61a55e2f2eaeabb1fbe52a020f5dde1 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 9 Jan 2025 20:49:21 +0000 -Subject: [PATCH 16/26] KVM: x86/mmu: Add support for KVM_MEM_USERFAULT +Subject: [PATCH 04/12] KVM: x86/mmu: Add support for KVM_MEM_USERFAULT Adhering to the requirements of KVM Userfault: @@ -19,19 +19,33 @@ WARN_ON() that was there. Signed-off-by: James Houghton --- + arch/arm64/kvm/mmu.c | 2 +- arch/x86/kvm/Kconfig | 1 + - arch/x86/kvm/mmu/mmu.c | 28 +++++++++++++++++++++---- + arch/x86/kvm/mmu/mmu.c | 15 ++++++++++++++ arch/x86/kvm/mmu/mmu_internal.h | 20 +++++++++++++++--- arch/x86/kvm/x86.c | 36 ++++++++++++++++++++++++--------- include/linux/kvm_host.h | 5 ++++- - 5 files changed, 72 insertions(+), 18 deletions(-) + 6 files changed, 64 insertions(+), 15 deletions(-) +diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c +index 55ac03f277e0..c673bbafc1e3 100644 +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -1554,7 +1554,7 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, +- write_fault, exec_fault, false); ++ write_fault, exec_fault, false, false); + return ret; + } + diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig -index 22d1bcdaad58..6b1ef6402e30 100644 +index 29845a286430..1139635d4e03 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig -@@ -48,6 +48,7 @@ config KVM_X86 - select KVM_PRIVATE_MEM if KVM_SW_PROTECTED_VM +@@ -49,6 +49,7 @@ config KVM_X86 + select KVM_GENERIC_GMEM_POPULATE if KVM_SW_PROTECTED_VM select KVM_GMEM_SHARED_MEM if KVM_SW_PROTECTED_VM select KVM_WERROR if WERROR + select HAVE_KVM_USERFAULT @@ -39,46 +53,20 @@ index 22d1bcdaad58..6b1ef6402e30 100644 config KVM tristate "Kernel-based Virtual Machine (KVM) support" diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c -index 8160870398b9..7ac7dc164522 100644 +index b071b9afb8ad..9e3d8e05dd30 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c -@@ -4292,14 +4292,20 @@ static inline u8 kvm_max_level_for_order(int order) - return PG_LEVEL_4K; - } - --static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, -- u8 max_level, int gmem_order) -+static u8 kvm_max_private_mapping_level(struct kvm *kvm, -+ struct kvm_memory_slot *slot, -+ kvm_pfn_t pfn, -+ u8 max_level, -+ int gmem_order) - { - u8 req_max_level; - +@@ -4504,6 +4504,9 @@ static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; -+ if (kvm_memslot_userfault(slot)) ++ if (kvm_memslot_userfault(fault->slot)) + return PG_LEVEL_4K; + - max_level = min(kvm_max_level_for_order(gmem_order), max_level); + max_level = min(kvm_max_level_for_order(order), max_level); if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; -@@ -4336,8 +4342,10 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, - } - - fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); -- fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn, -- fault->max_level, max_order); -+ fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->slot, -+ fault->pfn, -+ fault->max_level, -+ max_order); - - return RET_PF_CONTINUE; - } -@@ -4346,6 +4354,18 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, +@@ -4552,6 +4555,18 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { unsigned int foll = fault->write ? FOLL_WRITE : 0; @@ -95,13 +83,13 @@ index 8160870398b9..7ac7dc164522 100644 + if (kvm_memslot_userfault(fault->slot)) + fault->max_level = PG_LEVEL_4K; - if (fault->is_private) - return kvm_mmu_faultin_pfn_private(vcpu, fault); + if (fault_from_gmem(fault)) + return kvm_mmu_faultin_pfn_gmem(vcpu, fault); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h -index 75f00598289d..d1f18dcc18fb 100644 +index db8f33e4de62..84e4bb34abed 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h -@@ -335,12 +335,26 @@ enum { +@@ -336,12 +336,26 @@ enum { */ static_assert(RET_PF_CONTINUE == 0); @@ -132,10 +120,10 @@ index 75f00598289d..d1f18dcc18fb 100644 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 4b64ab350bcd..04034ca04703 100644 +index 03f1a5d6b2b0..e0c62bb9b105 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -13075,12 +13075,36 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, +@@ -13133,12 +13133,36 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, u32 new_flags = new ? new->flags : 0; bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES; @@ -174,7 +162,7 @@ index 4b64ab350bcd..04034ca04703 100644 /* * Nothing more to do for RO slots (which can't be dirtied and can't be -@@ -13100,14 +13124,6 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, +@@ -13158,14 +13182,6 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY)) return; @@ -190,10 +178,10 @@ index 4b64ab350bcd..04034ca04703 100644 /* * Recover huge page mappings in the slot now that dirty logging diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 6cd0d910678e..4a5379367332 100644 +index 0d7e46b00886..a2341ddbffb9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h -@@ -2499,7 +2499,8 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) +@@ -2527,7 +2527,8 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, gpa_t gpa, gpa_t size, bool is_write, bool is_exec, @@ -203,7 +191,7 @@ index 6cd0d910678e..4a5379367332 100644 { vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; vcpu->run->memory_fault.gpa = gpa; -@@ -2509,6 +2510,8 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, +@@ -2537,6 +2538,8 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, vcpu->run->memory_fault.flags = 0; if (is_private) vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; @@ -211,7 +199,7 @@ index 6cd0d910678e..4a5379367332 100644 + vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_USERFAULT; } - #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + static inline bool kvm_gmem_memslot_supports_shared(const struct kvm_memory_slot *slot) -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0017-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0005-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch similarity index 58% rename from resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0017-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch rename to resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0005-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch index c89c7c9b262..a7e5e2d567f 100644 --- a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0017-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0005-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch @@ -1,7 +1,7 @@ -From f0ef961eba32b98755d2bfa5ff684944e3a442fc Mon Sep 17 00:00:00 2001 +From 24ebc5c5026700260ff0a69cad25f0c1454dbc10 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 9 Jan 2025 20:49:20 +0000 -Subject: [PATCH 17/26] KVM: Advertise KVM_CAP_USERFAULT in KVM_CHECK_EXTENSION +Subject: [PATCH 05/12] KVM: Advertise KVM_CAP_USERFAULT in KVM_CHECK_EXTENSION Advertise support for KVM_CAP_USERFAULT when kvm_has_userfault() returns true. Currently this is merely IS_ENABLED(CONFIG_HAVE_KVM_USERFAULT), so @@ -14,32 +14,32 @@ Signed-off-by: James Houghton 2 files changed, 5 insertions(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h -index 0e1a2fac5735..f5ad5d39c24b 100644 +index 4ce2282f7a18..1c023deb54d7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h -@@ -934,6 +934,7 @@ struct kvm_enable_cap { - #define KVM_CAP_X86_GUEST_MODE 238 - #define KVM_CAP_GMEM_SHARED_MEM 239 - #define KVM_CAP_GMEM_NO_DIRECT_MAP 240 -+#define KVM_CAP_USERFAULT 241 +@@ -961,6 +961,7 @@ struct kvm_enable_cap { + #define KVM_CAP_RISCV_MP_STATE_RESET 242 + #define KVM_CAP_GMEM_SHARED_MEM 243 + #define KVM_CAP_GMEM_NO_DIRECT_MAP 244 ++#define KVM_CAP_USERFAULT 245 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index c60fe692de03..bb85ea8d0f85 100644 +index 2208033d5dd4..b224f167aeac 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c -@@ -4854,6 +4854,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +@@ -4941,6 +4941,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_GMEM_SHARED_MEM case KVM_CAP_GMEM_SHARED_MEM: - return !kvm || kvm_arch_gmem_supports_shared_mem(kvm); + return !kvm || kvm_arch_supports_gmem_shared_mem(kvm); +#endif -+#ifdef CONFIG_HAVE_KVM_USERFAULT -+ case KVM_CAP_USERFAULT: -+ return kvm_has_userfault(kvm); ++#ifdef CONFIG_KVM_HAVE_USERFAULT ++ case KVM_CAP_USERFAULT: ++ return kvm_has_userfault(kvm); #endif default: break; -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0018-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0006-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch similarity index 60% rename from resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0018-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch rename to resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0006-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch index 58f076e27cb..10f794efe82 100644 --- a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0018-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0006-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch @@ -1,7 +1,7 @@ -From 482a64008a53577da046428922f247dce203113f Mon Sep 17 00:00:00 2001 +From 9dfa044aa41bcf6f11934379fbc20cdcfe568b3b Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 9 Jan 2025 20:49:22 +0000 -Subject: [PATCH 18/26] KVM: arm64: Add support for KVM_MEM_USERFAULT +Subject: [PATCH 06/12] KVM: arm64: Add support for KVM_MEM_USERFAULT Adhering to the requirements of KVM Userfault: 1. When it is toggled on, zap the second stage with @@ -15,14 +15,14 @@ consistent with the behavior when dirty logging is disabled. Signed-off-by: James Houghton --- arch/arm64/kvm/Kconfig | 1 + - arch/arm64/kvm/mmu.c | 27 ++++++++++++++++++++++++++- - 2 files changed, 27 insertions(+), 1 deletion(-) + arch/arm64/kvm/mmu.c | 33 ++++++++++++++++++++++++++++++++- + 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig -index 4830d8805bed..aa0f438fba1c 100644 +index 87120d46919a..2b74fb15fcde 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig -@@ -39,6 +39,7 @@ menuconfig KVM +@@ -38,6 +38,7 @@ menuconfig KVM select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS select KVM_GMEM_SHARED_MEM @@ -31,21 +31,34 @@ index 4830d8805bed..aa0f438fba1c 100644 Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c -index adb0681fc1c6..39d9a02db9e9 100644 +index c673bbafc1e3..1213bb84197d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c -@@ -1497,7 +1497,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - kvm_pfn_t pfn; - bool logging_active = memslot_is_logging(memslot); - bool is_gmem = kvm_mem_is_private(kvm, gfn); -- bool force_pte = logging_active || is_gmem || is_protected_kvm_enabled(); -+ bool force_pte = logging_active || is_gmem || is_protected_kvm_enabled() || -+ kvm_memslot_userfault(memslot); - long vma_pagesize, fault_granule = PAGE_SIZE; - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; - struct kvm_pgtable *pgt; -@@ -1635,6 +1636,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - mmu_seq = vcpu->kvm->mmu_invalidate_seq; +@@ -1551,6 +1551,13 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + return -EFAULT; + } + ++ if (kvm_gfn_userfault(kvm, memslot, gfn)) { ++ kvm_prepare_memory_fault_exit(vcpu, gfn << PAGE_SHIFT, ++ PAGE_SIZE, write_fault, ++ exec_fault, false, true); ++ return -EFAULT; ++ } ++ + ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, +@@ -1644,7 +1651,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + return -EFAULT; + } + +- if (force_pte) ++ if (force_pte || kvm_memslot_userfault(memslot)) + vma_shift = PAGE_SHIFT; + else + vma_shift = get_vma_page_shift(vma, hva); +@@ -1731,6 +1738,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + mmu_seq = kvm->mmu_invalidate_seq; mmap_read_unlock(current->mm); + if (kvm_gfn_userfault(kvm, memslot, gfn)) { @@ -55,10 +68,10 @@ index adb0681fc1c6..39d9a02db9e9 100644 + return -EFAULT; + } + - pfn = faultin_pfn(kvm, memslot, gfn, write_fault, &writable, &page, is_gmem); + pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, + &writable, &page); if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(hva, vma_shift); -@@ -2125,6 +2133,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, +@@ -2217,6 +2231,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change) { bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES; @@ -71,7 +84,7 @@ index adb0681fc1c6..39d9a02db9e9 100644 + */ + if ((changed_flags & KVM_MEM_USERFAULT) && + (new_flags & KVM_MEM_USERFAULT) && -+ change == KVM_MR_FLAGS_ONLY) ++ change == KVM_MR_FLAGS_ONLY) + kvm_arch_flush_shadow_memslot(kvm, old); + + /* @@ -83,5 +96,5 @@ index adb0681fc1c6..39d9a02db9e9 100644 /* * At this point memslot has been committed and there is an -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0007-KVM-selftests-Fix-vm_mem_region_set_flags-docstring.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0007-KVM-selftests-Fix-vm_mem_region_set_flags-docstring.patch new file mode 100644 index 00000000000..5e1835a9f67 --- /dev/null +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0007-KVM-selftests-Fix-vm_mem_region_set_flags-docstring.patch @@ -0,0 +1,28 @@ +From 4b29cd637ae9d9b76c22ad9d9859723a27f32ee5 Mon Sep 17 00:00:00 2001 +From: James Houghton +Date: Thu, 9 Jan 2025 20:49:23 +0000 +Subject: [PATCH 07/12] KVM: selftests: Fix vm_mem_region_set_flags docstring + +`flags` is what region->region.flags gets set to. + +Signed-off-by: James Houghton +--- + tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c +index a055343a7bf7..ca1aa1699f8a 100644 +--- a/tools/testing/selftests/kvm/lib/kvm_util.c ++++ b/tools/testing/selftests/kvm/lib/kvm_util.c +@@ -1200,7 +1200,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) + * + * Input Args: + * vm - Virtual Machine +- * flags - Starting guest physical address ++ * flags - Flags for the memslot + * + * Output Args: None + * +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0008-KVM-selftests-Fix-prefault_mem-logic.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0008-KVM-selftests-Fix-prefault_mem-logic.patch new file mode 100644 index 00000000000..8223e278efd --- /dev/null +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0008-KVM-selftests-Fix-prefault_mem-logic.patch @@ -0,0 +1,37 @@ +From f9ac1ed774dd6f712780b694dfd3cd3fca108ba6 Mon Sep 17 00:00:00 2001 +From: James Houghton +Date: Thu, 9 Jan 2025 20:49:24 +0000 +Subject: [PATCH 08/12] KVM: selftests: Fix prefault_mem logic + +The previous logic didn't handle the case where memory was partitioned +AND we were using a single userfaultfd. It would only prefault the first +vCPU's memory and not the rest. + +Signed-off-by: James Houghton +--- + tools/testing/selftests/kvm/demand_paging_test.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c +index 0202b78f8680..315f5c9037b4 100644 +--- a/tools/testing/selftests/kvm/demand_paging_test.c ++++ b/tools/testing/selftests/kvm/demand_paging_test.c +@@ -172,11 +172,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) + memset(guest_data_prototype, 0xAB, demand_paging_size); + + if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { +- num_uffds = p->single_uffd ? 1 : nr_vcpus; +- for (i = 0; i < num_uffds; i++) { ++ for (i = 0; i < nr_vcpus; i++) { + vcpu_args = &memstress_args.vcpu_args[i]; + prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), + vcpu_args->pages * memstress_args.guest_page_size); ++ if (!p->partition_vcpu_memory_access) ++ /* We prefaulted everything */ ++ break; + } + } + +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0009-KVM-selftests-Add-va_start-end-into-uffd_desc.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0009-KVM-selftests-Add-va_start-end-into-uffd_desc.patch new file mode 100644 index 00000000000..6c9e983c94e --- /dev/null +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0009-KVM-selftests-Add-va_start-end-into-uffd_desc.patch @@ -0,0 +1,44 @@ +From 9ba5bb07f565a05b1f3692db4efbc6c934064cee Mon Sep 17 00:00:00 2001 +From: James Houghton +Date: Thu, 9 Jan 2025 20:49:25 +0000 +Subject: [PATCH 09/12] KVM: selftests: Add va_start/end into uffd_desc + +This will be used for the self-test to look up which userfaultfd we +should be using when handling a KVM Userfault (in the event KVM +Userfault and userfaultfd are being used together). + +Signed-off-by: James Houghton +--- + tools/testing/selftests/kvm/include/userfaultfd_util.h | 2 ++ + tools/testing/selftests/kvm/lib/userfaultfd_util.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h +index 60f7f9d435dc..b62fecdfe745 100644 +--- a/tools/testing/selftests/kvm/include/userfaultfd_util.h ++++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h +@@ -30,6 +30,8 @@ struct uffd_desc { + int *pipefds; + pthread_t *readers; + struct uffd_reader_args *reader_args; ++ void *va_start; ++ void *va_end; + }; + + struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, +diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c +index 5bde176cedd5..31d38b3a9d12 100644 +--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c ++++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c +@@ -152,6 +152,8 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + expected_ioctls, "missing userfaultfd ioctls"); + + uffd_desc->uffd = uffd; ++ uffd_desc->va_start = hva; ++ uffd_desc->va_end = (char *)hva + len; + for (i = 0; i < uffd_desc->num_readers; ++i) { + int pipes[2]; + +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0010-KVM-selftests-Inform-set_memory_region_test-of-KVM_M.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0010-KVM-selftests-Inform-set_memory_region_test-of-KVM_M.patch new file mode 100644 index 00000000000..2ec812f076a --- /dev/null +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0010-KVM-selftests-Inform-set_memory_region_test-of-KVM_M.patch @@ -0,0 +1,31 @@ +From 5aa2c3386b0569737b0a0adda1fc2e22cf7e5a84 Mon Sep 17 00:00:00 2001 +From: James Houghton +Date: Thu, 9 Jan 2025 20:49:27 +0000 +Subject: [PATCH 10/12] KVM: selftests: Inform set_memory_region_test of + KVM_MEM_USERFAULT + +The KVM_MEM_USERFAULT flag is supported iff KVM_CAP_USERFAULT is +available. + +Signed-off-by: James Houghton +--- + tools/testing/selftests/kvm/set_memory_region_test.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c +index ce3ac0fd6dfb..ba3fe8a53b33 100644 +--- a/tools/testing/selftests/kvm/set_memory_region_test.c ++++ b/tools/testing/selftests/kvm/set_memory_region_test.c +@@ -364,6 +364,9 @@ static void test_invalid_memory_region_flags(void) + if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE) + supported_flags |= KVM_MEM_GUEST_MEMFD; + ++ if (kvm_check_cap(KVM_CAP_USERFAULT)) ++ supported_flags |= KVM_MEM_USERFAULT; ++ + for (i = 0; i < 32; i++) { + if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i))) + continue; +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0011-KVM-selftests-Add-KVM-Userfault-mode-to-demand_pagin.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0011-KVM-selftests-Add-KVM-Userfault-mode-to-demand_pagin.patch new file mode 100644 index 00000000000..dd7f106daa4 --- /dev/null +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0011-KVM-selftests-Add-KVM-Userfault-mode-to-demand_pagin.patch @@ -0,0 +1,381 @@ +From 67a2c0276b1898f0fba9c3e6eccc07feaaf2d20d Mon Sep 17 00:00:00 2001 +From: James Houghton +Date: Thu, 9 Jan 2025 20:49:26 +0000 +Subject: [PATCH 11/12] KVM: selftests: Add KVM Userfault mode to + demand_paging_test + +Add a way for the KVM_RUN loop to handle -EFAULT exits when they are for +KVM_MEMORY_EXIT_FLAG_USERFAULT. In this case, preemptively handle the +UFFDIO_COPY or UFFDIO_CONTINUE if userfaultfd is also in use. This saves +the trip through the userfaultfd poll/read/WAKE loop. + +When preemptively handling UFFDIO_COPY/CONTINUE, do so with +MODE_DONTWAKE, as there will not be a thread to wake. If a thread *does* +take the userfaultfd slow path, we will get a regular userfault, and we +will call handle_uffd_page_request() which will do a full wake-up. In +the EEXIST case, a wake-up will not occur. Make sure to call UFFDIO_WAKE +explicitly in this case. + +When handling KVM userfaults, make sure to set the bitmap with +memory_order_release. Although it wouldn't affect the functionality of +the test (because memstress doesn't actually require any particular +guest memory contents), it is what userspace normally needs to do. + +Add `-k` to set the test to use KVM Userfault. + +Add the vm_mem_region_set_flags_userfault() helper for setting +`userfault_bitmap` and KVM_MEM_USERFAULT at the same time. + +Signed-off-by: James Houghton +--- + .../selftests/kvm/demand_paging_test.c | 139 +++++++++++++++++- + .../testing/selftests/kvm/include/kvm_util.h | 5 + + tools/testing/selftests/kvm/lib/kvm_util.c | 40 ++++- + 3 files changed, 176 insertions(+), 8 deletions(-) + +diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c +index 315f5c9037b4..183c70731093 100644 +--- a/tools/testing/selftests/kvm/demand_paging_test.c ++++ b/tools/testing/selftests/kvm/demand_paging_test.c +@@ -12,7 +12,9 @@ + #include + #include + #include ++#include + #include ++#include + + #include "kvm_util.h" + #include "test_util.h" +@@ -24,11 +26,21 @@ + #ifdef __NR_userfaultfd + + static int nr_vcpus = 1; ++static int num_uffds; + static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + + static size_t demand_paging_size; ++static size_t host_page_size; + static char *guest_data_prototype; + ++static struct { ++ bool enabled; ++ int uffd_mode; /* set if userfaultfd is also in use */ ++ struct uffd_desc **uffd_descs; ++} kvm_userfault_data; ++ ++static void resolve_kvm_userfault(u64 gpa, u64 size); ++ + static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) + { + struct kvm_vcpu *vcpu = vcpu_args->vcpu; +@@ -41,8 +53,22 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) + clock_gettime(CLOCK_MONOTONIC, &start); + + /* Let the guest access its memory */ ++restart: + ret = _vcpu_run(vcpu); +- TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); ++ if (ret < 0 && errno == EFAULT && kvm_userfault_data.enabled) { ++ /* Check for userfault. */ ++ TEST_ASSERT(run->exit_reason == KVM_EXIT_MEMORY_FAULT, ++ "Got invalid exit reason: %x", run->exit_reason); ++ TEST_ASSERT(run->memory_fault.flags == ++ KVM_MEMORY_EXIT_FLAG_USERFAULT, ++ "Got invalid memory fault exit: %llx", ++ run->memory_fault.flags); ++ resolve_kvm_userfault(run->memory_fault.gpa, ++ run->memory_fault.size); ++ goto restart; ++ } else ++ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); ++ + if (get_ucall(vcpu, NULL) != UCALL_SYNC) { + TEST_ASSERT(false, + "Invalid guest sync status: exit_reason=%s", +@@ -54,11 +80,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) + ts_diff.tv_sec, ts_diff.tv_nsec); + } + +-static int handle_uffd_page_request(int uffd_mode, int uffd, +- struct uffd_msg *msg) ++static int resolve_uffd_page_request(int uffd_mode, int uffd, uint64_t addr, ++ bool wake) + { + pid_t tid = syscall(__NR_gettid); +- uint64_t addr = msg->arg.pagefault.address; + struct timespec start; + struct timespec ts_diff; + int r; +@@ -71,7 +96,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, + copy.src = (uint64_t)guest_data_prototype; + copy.dst = addr; + copy.len = demand_paging_size; +- copy.mode = 0; ++ copy.mode = wake ? 0 : UFFDIO_COPY_MODE_DONTWAKE; + + r = ioctl(uffd, UFFDIO_COPY, ©); + /* +@@ -96,6 +121,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, + + cont.range.start = addr; + cont.range.len = demand_paging_size; ++ cont.mode = wake ? 0 : UFFDIO_CONTINUE_MODE_DONTWAKE; + + r = ioctl(uffd, UFFDIO_CONTINUE, &cont); + /* +@@ -119,6 +145,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, + TEST_FAIL("Invalid uffd mode %d", uffd_mode); + } + ++ if (r < 0 && wake) { ++ /* ++ * No wake-up occurs when UFFDIO_COPY/CONTINUE fails, but we ++ * have a thread waiting. Wake it up. ++ */ ++ struct uffdio_range range = {0}; ++ ++ range.start = addr; ++ range.len = demand_paging_size; ++ ++ TEST_ASSERT(ioctl(uffd, UFFDIO_WAKE, &range) == 0, ++ "UFFDIO_WAKE failed: 0x%lx", addr); ++ } ++ + ts_diff = timespec_elapsed(start); + + PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid, +@@ -129,6 +169,58 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, + return 0; + } + ++static int handle_uffd_page_request(int uffd_mode, int uffd, ++ struct uffd_msg *msg) ++{ ++ uint64_t addr = msg->arg.pagefault.address; ++ ++ return resolve_uffd_page_request(uffd_mode, uffd, addr, true); ++} ++ ++static void resolve_kvm_userfault(u64 gpa, u64 size) ++{ ++ struct kvm_vm *vm = memstress_args.vm; ++ struct userspace_mem_region *region; ++ unsigned long *bitmap_chunk; ++ u64 page, gpa_offset; ++ ++ region = (struct userspace_mem_region *) userspace_mem_region_find( ++ vm, gpa, (gpa + size - 1)); ++ ++ if (kvm_userfault_data.uffd_mode) { ++ /* ++ * Resolve userfaults early, without needing to read them ++ * off the userfaultfd. ++ */ ++ uint64_t hva = (uint64_t)addr_gpa2hva(vm, gpa); ++ struct uffd_desc **descs = kvm_userfault_data.uffd_descs; ++ int i, fd; ++ ++ for (i = 0; i < num_uffds; ++i) ++ if (hva >= (uint64_t)descs[i]->va_start && ++ hva < (uint64_t)descs[i]->va_end) ++ break; ++ ++ TEST_ASSERT(i < num_uffds, ++ "Did not find userfaultfd for hva: %lx", hva); ++ ++ fd = kvm_userfault_data.uffd_descs[i]->uffd; ++ resolve_uffd_page_request(kvm_userfault_data.uffd_mode, fd, ++ hva, false); ++ } else { ++ uint64_t hva = (uint64_t)addr_gpa2hva(vm, gpa); ++ ++ memcpy((char *)hva, guest_data_prototype, demand_paging_size); ++ } ++ ++ gpa_offset = gpa - region->region.guest_phys_addr; ++ page = gpa_offset / host_page_size; ++ bitmap_chunk = (unsigned long *)region->region.userfault_bitmap + ++ page / BITS_PER_LONG; ++ atomic_fetch_and_explicit((_Atomic unsigned long *)bitmap_chunk, ++ ~(1ul << (page % BITS_PER_LONG)), memory_order_release); ++} ++ + struct test_params { + int uffd_mode; + bool single_uffd; +@@ -136,6 +228,7 @@ struct test_params { + int readers_per_uffd; + enum vm_mem_backing_src_type src_type; + bool partition_vcpu_memory_access; ++ bool kvm_userfault; + }; + + static void prefault_mem(void *alias, uint64_t len) +@@ -149,6 +242,25 @@ static void prefault_mem(void *alias, uint64_t len) + } + } + ++static void enable_userfault(struct kvm_vm *vm, int slots) ++{ ++ for (int i = 0; i < slots; ++i) { ++ int slot = MEMSTRESS_MEM_SLOT_INDEX + i; ++ struct userspace_mem_region *region; ++ unsigned long *userfault_bitmap; ++ int flags = KVM_MEM_USERFAULT; ++ ++ region = memslot2region(vm, slot); ++ userfault_bitmap = bitmap_zalloc(region->mmap_size / ++ host_page_size); ++ /* everything is userfault initially */ ++ memset(userfault_bitmap, -1, region->mmap_size / host_page_size / CHAR_BIT); ++ printf("Setting bitmap: %p\n", userfault_bitmap); ++ vm_mem_region_set_flags_userfault(vm, slot, flags, ++ userfault_bitmap); ++ } ++} ++ + static void run_test(enum vm_guest_mode mode, void *arg) + { + struct memstress_vcpu_args *vcpu_args; +@@ -159,12 +271,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) + struct timespec ts_diff; + double vcpu_paging_rate; + struct kvm_vm *vm; +- int i, num_uffds = 0; ++ int i; + + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + p->src_type, p->partition_vcpu_memory_access); + + demand_paging_size = get_backing_src_pagesz(p->src_type); ++ host_page_size = getpagesize(); + + guest_data_prototype = malloc(demand_paging_size); + TEST_ASSERT(guest_data_prototype, +@@ -208,6 +321,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) + } + } + ++ if (p->kvm_userfault) { ++ TEST_REQUIRE(kvm_has_cap(KVM_CAP_USERFAULT)); ++ kvm_userfault_data.enabled = true; ++ kvm_userfault_data.uffd_mode = p->uffd_mode; ++ kvm_userfault_data.uffd_descs = uffd_descs; ++ enable_userfault(vm, 1); ++ } ++ + pr_info("Finished creating vCPUs and starting uffd threads\n"); + + clock_gettime(CLOCK_MONOTONIC, &start); +@@ -265,6 +386,7 @@ static void help(char *name) + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); ++ printf(" -k: Use KVM Userfault\n"); + puts(""); + exit(0); + } +@@ -283,7 +405,7 @@ int main(int argc, char *argv[]) + + guest_modes_append_default(); + +- while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) { ++ while ((opt = getopt(argc, argv, "ahokm:u:d:b:s:v:c:r:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); +@@ -326,6 +448,9 @@ int main(int argc, char *argv[]) + "Invalid number of readers per uffd %d: must be >=1", + p.readers_per_uffd); + break; ++ case 'k': ++ p.kvm_userfault = true; ++ break; + case 'h': + default: + help(argv[0]); +diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h +index bee65ca08721..5642d075900f 100644 +--- a/tools/testing/selftests/kvm/include/kvm_util.h ++++ b/tools/testing/selftests/kvm/include/kvm_util.h +@@ -630,6 +630,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, + void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); ++struct userspace_mem_region * ++userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); + + #ifndef vm_arch_has_protected_memory + static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) +@@ -639,6 +641,9 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) + #endif + + void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); ++void vm_mem_region_set_flags_userfault(struct kvm_vm *vm, uint32_t slot, ++ uint32_t flags, ++ unsigned long *userfault_bitmap); + void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); + void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); + struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c +index ca1aa1699f8a..3c215df1d2d8 100644 +--- a/tools/testing/selftests/kvm/lib/kvm_util.c ++++ b/tools/testing/selftests/kvm/lib/kvm_util.c +@@ -694,7 +694,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + * of the regions is returned. Null is returned only when no overlapping + * region exists. + */ +-static struct userspace_mem_region * ++struct userspace_mem_region * + userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) + { + struct rb_node *node; +@@ -1225,6 +1225,44 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) + ret, errno, slot, flags); + } + ++/* ++ * VM Memory Region Flags Set with a userfault bitmap ++ * ++ * Input Args: ++ * vm - Virtual Machine ++ * flags - Flags for the memslot ++ * userfault_bitmap - The bitmap to use for KVM_MEM_USERFAULT ++ * ++ * Output Args: None ++ * ++ * Return: None ++ * ++ * Sets the flags of the memory region specified by the value of slot, ++ * to the values given by flags. This helper adds a way to provide a ++ * userfault_bitmap. ++ */ ++void vm_mem_region_set_flags_userfault(struct kvm_vm *vm, uint32_t slot, ++ uint32_t flags, ++ unsigned long *userfault_bitmap) ++{ ++ int ret; ++ struct userspace_mem_region *region; ++ ++ region = memslot2region(vm, slot); ++ ++ TEST_ASSERT(!userfault_bitmap ^ (flags & KVM_MEM_USERFAULT), ++ "KVM_MEM_USERFAULT must be specified with a bitmap"); ++ ++ region->region.flags = flags; ++ region->region.userfault_bitmap = (__u64)userfault_bitmap; ++ ++ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); ++ ++ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" ++ " rc: %i errno: %i slot: %u flags: 0x%x", ++ ret, errno, slot, flags); ++} ++ + /* + * VM Memory Region Move + * +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0012-KVM-selftests-Add-KVM_MEM_USERFAULT-guest_memfd-togg.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0012-KVM-selftests-Add-KVM_MEM_USERFAULT-guest_memfd-togg.patch new file mode 100644 index 00000000000..01e26d8036a --- /dev/null +++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0012-KVM-selftests-Add-KVM_MEM_USERFAULT-guest_memfd-togg.patch @@ -0,0 +1,65 @@ +From eed010571ed963539440319eace2395b47b2b4a6 Mon Sep 17 00:00:00 2001 +From: James Houghton +Date: Thu, 9 Jan 2025 20:49:28 +0000 +Subject: [PATCH 12/12] KVM: selftests: Add KVM_MEM_USERFAULT + guest_memfd + toggle tests + +Make sure KVM_MEM_USERFAULT can be toggled on and off for +KVM_MEM_GUEST_MEMFD memslots. + +Signed-off-by: James Houghton +--- + .../selftests/kvm/set_memory_region_test.c | 30 +++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c +index ba3fe8a53b33..20a03cb57acf 100644 +--- a/tools/testing/selftests/kvm/set_memory_region_test.c ++++ b/tools/testing/selftests/kvm/set_memory_region_test.c +@@ -606,6 +606,35 @@ static void test_mmio_during_vectoring(void) + + kvm_vm_free(vm); + } ++ ++static void test_private_memory_region_userfault(void) ++{ ++ struct kvm_vm *vm; ++ int memfd; ++ ++ pr_info("Testing toggling KVM_MEM_USERFAULT on KVM_MEM_GUEST_MEMFD memory regions\n"); ++ ++ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); ++ ++ test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); ++ test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); ++ ++ memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); ++ ++ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, ++ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); ++ ++ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, ++ KVM_MEM_GUEST_MEMFD | KVM_MEM_USERFAULT, ++ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); ++ ++ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, ++ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); ++ ++ close(memfd); ++ ++ kvm_vm_free(vm); ++} + #endif + + int main(int argc, char *argv[]) +@@ -633,6 +662,7 @@ int main(int argc, char *argv[]) + (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) { + test_add_private_memory_region(); + test_add_overlapping_private_memory_regions(); ++ test_private_memory_region_userfault(); + } else { + pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n"); + } +-- +2.49.0 + diff --git a/resources/hiding_ci/linux_patches/20-gmem-write/0019-KVM-guest_memfd-add-generic-population-via-write.patch b/resources/hiding_ci/linux_patches/20-gmem-write/0037-KVM-guest_memfd-add-generic-population-via-write.patch similarity index 84% rename from resources/hiding_ci/linux_patches/20-gmem-write/0019-KVM-guest_memfd-add-generic-population-via-write.patch rename to resources/hiding_ci/linux_patches/20-gmem-write/0037-KVM-guest_memfd-add-generic-population-via-write.patch index 0c05129841e..daad8db5d25 100644 --- a/resources/hiding_ci/linux_patches/20-gmem-write/0019-KVM-guest_memfd-add-generic-population-via-write.patch +++ b/resources/hiding_ci/linux_patches/20-gmem-write/0037-KVM-guest_memfd-add-generic-population-via-write.patch @@ -1,7 +1,7 @@ -From f81fae83d40e1520a0a46afa3473f9fc4c6b7c79 Mon Sep 17 00:00:00 2001 +From 39043385e22a91db6aec81b35ca1c6b6ac48a7b3 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin -Date: Fri, 29 Nov 2024 11:51:02 +0000 -Subject: [PATCH 19/26] KVM: guest_memfd: add generic population via write +Date: Mon, 3 Mar 2025 13:08:37 +0000 +Subject: [PATCH 37/42] KVM: guest_memfd: add generic population via write write syscall populates guest_memfd with user-supplied data in a generic way, ie no vendor-specific preparation is performed. This is supposed @@ -18,20 +18,17 @@ The following behaviour is implemented: Signed-off-by: Nikita Kalyazin --- - virt/kvm/guest_memfd.c | 94 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 91 insertions(+), 3 deletions(-) + virt/kvm/guest_memfd.c | 92 +++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index 291d647a5c80..5abb6d52a375 100644 +index 99e1b20a9977..54ddad35bb5c 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c -@@ -432,12 +432,97 @@ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) - +@@ -392,8 +392,95 @@ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } --#else --#define kvm_gmem_mmap NULL -+ + +static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ @@ -114,8 +111,7 @@ index 291d647a5c80..5abb6d52a375 100644 + return ret && start == (*offset >> PAGE_SHIFT) ? + ret : *offset - (start << PAGE_SHIFT); +} - #endif /* CONFIG_KVM_GMEM_SHARED_MEM */ - ++ static struct file_operations kvm_gmem_fops = { - .mmap = kvm_gmem_mmap, +#ifdef CONFIG_KVM_GMEM_SHARED_MEM @@ -126,7 +122,7 @@ index 291d647a5c80..5abb6d52a375 100644 .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, -@@ -557,6 +642,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) +@@ -514,6 +601,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) } file->f_flags |= O_LARGEFILE; @@ -137,5 +133,5 @@ index 291d647a5c80..5abb6d52a375 100644 inode = file->f_inode; WARN_ON(file->f_mapping != inode->i_mapping); -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/20-gmem-write/0020-KVM-selftests-update-guest_memfd-write-tests.patch b/resources/hiding_ci/linux_patches/20-gmem-write/0038-KVM-selftests-update-guest_memfd-write-tests.patch similarity index 87% rename from resources/hiding_ci/linux_patches/20-gmem-write/0020-KVM-selftests-update-guest_memfd-write-tests.patch rename to resources/hiding_ci/linux_patches/20-gmem-write/0038-KVM-selftests-update-guest_memfd-write-tests.patch index 869144f63d0..942c8c32678 100644 --- a/resources/hiding_ci/linux_patches/20-gmem-write/0020-KVM-selftests-update-guest_memfd-write-tests.patch +++ b/resources/hiding_ci/linux_patches/20-gmem-write/0038-KVM-selftests-update-guest_memfd-write-tests.patch @@ -1,7 +1,7 @@ -From 3ccb28e0fe31afa8ac626ebd5b957ba9263a68d3 Mon Sep 17 00:00:00 2001 +From f694f4128da12fa8153c952c7746b2290019ac94 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin -Date: Fri, 29 Nov 2024 11:57:58 +0000 -Subject: [PATCH 20/26] KVM: selftests: update guest_memfd write tests +Date: Mon, 3 Mar 2025 13:08:38 +0000 +Subject: [PATCH 38/42] KVM: selftests: update guest_memfd write tests This is to reflect that the write syscall is now implemented for guest_memfd. @@ -12,10 +12,10 @@ Signed-off-by: Nikita Kalyazin 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c -index 38c501e49e0e..b07221aa54c9 100644 +index 5da2ed6277ac..bd001e2b3f6e 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c -@@ -20,18 +20,90 @@ +@@ -22,18 +22,90 @@ #include "kvm_util.h" #include "test_util.h" @@ -110,17 +110,17 @@ index 38c501e49e0e..b07221aa54c9 100644 + free(buf); } - static void test_mmap_allowed(int fd, size_t total_size) -@@ -233,7 +305,8 @@ void test_vm_type(unsigned long type, bool is_shared) + static void test_mmap_supported(int fd, size_t page_size, size_t total_size) +@@ -270,7 +342,8 @@ static void test_with_type(unsigned long vm_type, uint64_t guest_memfd_flags, - fd = vm_create_guest_memfd(vm, total_size, 0); + fd = vm_create_guest_memfd(vm, total_size, guest_memfd_flags); - test_file_read_write(fd); + test_file_read(fd); + test_file_write(fd, total_size); - if (is_shared) - test_mmap_allowed(fd, total_size); + if (expect_mmap_allowed) { + test_mmap_supported(fd, page_size, total_size); -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0021-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0001-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch similarity index 86% rename from resources/hiding_ci/linux_patches/25-gmem-uffd/0021-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch rename to resources/hiding_ci/linux_patches/25-gmem-uffd/0001-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch index 4818a87a713..618a6631f84 100644 --- a/resources/hiding_ci/linux_patches/25-gmem-uffd/0021-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch +++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0001-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch @@ -1,7 +1,7 @@ -From 51dc7d27476d00d96f6f71882a11b5e17e80aa8f Mon Sep 17 00:00:00 2001 +From 8815daa4b4574f5bddc6632e254d61053e642d06 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Mon, 31 Mar 2025 10:15:35 +0000 -Subject: [PATCH 21/26] mm: userfaultfd: generic continue for non hugetlbfs +Subject: [PATCH 1/6] mm: userfaultfd: generic continue for non hugetlbfs Remove shmem-specific code from UFFDIO_CONTINUE implementation for non-huge pages by calling vm_ops->fault(). A new VMF flag, @@ -18,10 +18,10 @@ Signed-off-by: Nikita Kalyazin 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index 0234f14f2aa6..2f26ee9742bf 100644 +index d6b91e8a66d6..d4c35a50058c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h -@@ -1429,6 +1429,9 @@ enum tlb_flush_reason { +@@ -1565,6 +1565,9 @@ enum tlb_flush_reason { * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. * We should only access orig_pte if this flag set. * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock. @@ -31,7 +31,7 @@ index 0234f14f2aa6..2f26ee9742bf 100644 * * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify * whether we would allow page faults to retry by specifying these two -@@ -1467,6 +1470,7 @@ enum fault_flag { +@@ -1603,6 +1606,7 @@ enum fault_flag { FAULT_FLAG_UNSHARE = 1 << 10, FAULT_FLAG_ORIG_PTE_VALID = 1 << 11, FAULT_FLAG_VMA_LOCK = 1 << 12, @@ -40,10 +40,10 @@ index 0234f14f2aa6..2f26ee9742bf 100644 typedef unsigned int __bitwise zap_flags_t; diff --git a/mm/hugetlb.c b/mm/hugetlb.c -index 97930d44d460..c004cfdcd4e2 100644 +index 8746ed2fec13..ff9c0ffa128d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c -@@ -6228,7 +6228,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, +@@ -6553,7 +6553,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, } /* Check for page in userfault range. */ @@ -53,10 +53,10 @@ index 97930d44d460..c004cfdcd4e2 100644 folio_put(folio); /* See comment in userfaultfd_missing() block above */ diff --git a/mm/shmem.c b/mm/shmem.c -index 1ede0800e846..b4159303fe59 100644 +index 0c5fb4ffa03a..bca13ee02574 100644 --- a/mm/shmem.c +++ b/mm/shmem.c -@@ -2467,7 +2467,8 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, +@@ -2454,7 +2454,8 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, fault_mm = vma ? vma->vm_mm : NULL; folio = filemap_get_entry(inode->i_mapping, index); @@ -66,7 +66,7 @@ index 1ede0800e846..b4159303fe59 100644 if (!xa_is_value(folio)) folio_put(folio); *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); -@@ -2727,6 +2728,8 @@ static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode) +@@ -2714,6 +2715,8 @@ static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode) static vm_fault_t shmem_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -75,7 +75,7 @@ index 1ede0800e846..b4159303fe59 100644 gfp_t gfp = mapping_gfp_mask(inode->i_mapping); struct folio *folio = NULL; vm_fault_t ret = 0; -@@ -2743,8 +2746,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) +@@ -2730,8 +2733,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) } WARN_ON_ONCE(vmf->page != NULL); @@ -87,10 +87,10 @@ index 1ede0800e846..b4159303fe59 100644 return vmf_error(err); if (folio) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c -index d06453fa8aba..4b3dbc7dac64 100644 +index bc473ad21202..f88bb8de7fff 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c -@@ -380,30 +380,47 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, +@@ -376,30 +376,47 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, return ret; } @@ -149,5 +149,5 @@ index d06453fa8aba..4b3dbc7dac64 100644 ret = -EIO; goto out_release; -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0022-mm-provide-can_userfault-vma-operation.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0002-mm-provide-can_userfault-vma-operation.patch similarity index 77% rename from resources/hiding_ci/linux_patches/25-gmem-uffd/0022-mm-provide-can_userfault-vma-operation.patch rename to resources/hiding_ci/linux_patches/25-gmem-uffd/0002-mm-provide-can_userfault-vma-operation.patch index b6bc10178cc..d8ee451933c 100644 --- a/resources/hiding_ci/linux_patches/25-gmem-uffd/0022-mm-provide-can_userfault-vma-operation.patch +++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0002-mm-provide-can_userfault-vma-operation.patch @@ -1,7 +1,7 @@ -From 7ed09f6e50ea4e4448e457a7b7712bdf3b38e826 Mon Sep 17 00:00:00 2001 +From d539c525dcb6e20c77b3921caf5a057301dc3880 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Fri, 4 Apr 2025 14:15:18 +0000 -Subject: [PATCH 22/26] mm: provide can_userfault vma operation +Subject: [PATCH 2/6] mm: provide can_userfault vma operation The new operation allows to decouple the userfaulfd code from dependencies to VMA types, specifically, shmem and hugetlb. The @@ -17,10 +17,10 @@ Signed-off-by: Nikita Kalyazin 3 files changed, 20 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h -index 8483e09aeb2c..488d721d8542 100644 +index 0ef2ba0c667a..72a29c83a7cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -680,6 +680,11 @@ struct vm_operations_struct { +@@ -653,6 +653,11 @@ struct vm_operations_struct { */ struct page *(*find_special_page)(struct vm_area_struct *vma, unsigned long addr); @@ -33,10 +33,10 @@ index 8483e09aeb2c..488d721d8542 100644 #ifdef CONFIG_NUMA_BALANCING diff --git a/mm/hugetlb.c b/mm/hugetlb.c -index c004cfdcd4e2..f3901c11e1fd 100644 +index ff9c0ffa128d..bfd78f572210 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c -@@ -5143,6 +5143,12 @@ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) +@@ -5467,6 +5467,12 @@ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) return huge_page_size(hstate_vma(vma)); } @@ -49,19 +49,19 @@ index c004cfdcd4e2..f3901c11e1fd 100644 /* * We cannot handle pagefaults against hugetlb pages at all. They cause * handle_mm_fault() to try to instantiate regular-sized pages in the -@@ -5168,6 +5174,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { +@@ -5492,6 +5498,7 @@ const struct vm_operations_struct hugetlb_vm_ops = { .close = hugetlb_vm_op_close, .may_split = hugetlb_vm_op_split, .pagesize = hugetlb_vm_op_pagesize, + .can_userfault = hugetlb_vm_op_can_userfault, }; - static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, + static pte_t make_huge_pte(struct vm_area_struct *vma, struct folio *folio, diff --git a/mm/shmem.c b/mm/shmem.c -index b4159303fe59..0b9e19abd1e9 100644 +index bca13ee02574..920a0fc023f1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c -@@ -2891,6 +2891,12 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, +@@ -2878,6 +2878,12 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); } @@ -74,7 +74,7 @@ index b4159303fe59..0b9e19abd1e9 100644 static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info, pgoff_t index, unsigned int order, pgoff_t *ilx) { -@@ -5309,6 +5315,7 @@ static const struct vm_operations_struct shmem_vm_ops = { +@@ -5294,6 +5300,7 @@ static const struct vm_operations_struct shmem_vm_ops = { .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif @@ -82,7 +82,7 @@ index b4159303fe59..0b9e19abd1e9 100644 }; static const struct vm_operations_struct shmem_anon_vm_ops = { -@@ -5318,6 +5325,7 @@ static const struct vm_operations_struct shmem_anon_vm_ops = { +@@ -5303,6 +5310,7 @@ static const struct vm_operations_struct shmem_anon_vm_ops = { .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif @@ -91,5 +91,5 @@ index b4159303fe59..0b9e19abd1e9 100644 int shmem_init_fs_context(struct fs_context *fc) -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0023-mm-userfaultfd-use-can_userfault-vma-operation.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0003-mm-userfaultfd-use-can_userfault-vma-operation.patch similarity index 88% rename from resources/hiding_ci/linux_patches/25-gmem-uffd/0023-mm-userfaultfd-use-can_userfault-vma-operation.patch rename to resources/hiding_ci/linux_patches/25-gmem-uffd/0003-mm-userfaultfd-use-can_userfault-vma-operation.patch index ce5130bb620..aaa7b5670a9 100644 --- a/resources/hiding_ci/linux_patches/25-gmem-uffd/0023-mm-userfaultfd-use-can_userfault-vma-operation.patch +++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0003-mm-userfaultfd-use-can_userfault-vma-operation.patch @@ -1,7 +1,7 @@ -From 04555059b68ba6e2aeb678da706a8290e3598df0 Mon Sep 17 00:00:00 2001 +From 1730062eb660fc23e0d9211225c240add60764e6 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Fri, 4 Apr 2025 14:16:49 +0000 -Subject: [PATCH 23/26] mm: userfaultfd: use can_userfault vma operation +Subject: [PATCH 3/6] mm: userfaultfd: use can_userfault vma operation Signed-off-by: Nikita Kalyazin --- @@ -46,10 +46,10 @@ index 75342022d144..64551e8a55fb 100644 static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c -index 4b3dbc7dac64..0aa82c968e16 100644 +index f88bb8de7fff..8d8df51615b5 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c -@@ -728,6 +728,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, +@@ -724,6 +724,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, unsigned long src_addr, dst_addr; long copied; struct folio *folio; @@ -57,7 +57,7 @@ index 4b3dbc7dac64..0aa82c968e16 100644 /* * Sanitize the command parameters: -@@ -787,10 +788,13 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, +@@ -783,10 +784,13 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, return mfill_atomic_hugetlb(ctx, dst_vma, dst_start, src_start, len, flags); @@ -75,5 +75,5 @@ index 4b3dbc7dac64..0aa82c968e16 100644 while (src_addr < src_start + len) { -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0024-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0004-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch similarity index 63% rename from resources/hiding_ci/linux_patches/25-gmem-uffd/0024-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch rename to resources/hiding_ci/linux_patches/25-gmem-uffd/0004-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch index 37dc68e3989..27a3137b453 100644 --- a/resources/hiding_ci/linux_patches/25-gmem-uffd/0024-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch +++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0004-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch @@ -1,38 +1,35 @@ -From b806003684d08506cb66c664efdfda3d7ff6103e Mon Sep 17 00:00:00 2001 +From 14384ede95c5fb7292696fc43268dfe6c5e0830e Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Tue, 1 Apr 2025 15:02:56 +0000 -Subject: [PATCH 24/26] KVM: guest_memfd: add support for userfaultfd minor +Subject: [PATCH 4/6] KVM: guest_memfd: add support for userfaultfd minor Add support for sending a pagefault event if userfaultfd is registered. Only page minor event is currently supported. Signed-off-by: Nikita Kalyazin --- - virt/kvm/guest_memfd.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) + virt/kvm/guest_memfd.c | 7 +++++++ + 1 file changed, 7 insertions(+) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index 5abb6d52a375..91ee5dd91c31 100644 +index 54ddad35bb5c..212c70b8da0c 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c -@@ -5,6 +5,9 @@ +@@ -5,6 +5,7 @@ #include #include #include -+#ifdef CONFIG_KVM_PRIVATE_MEM +#include -+#endif /* CONFIG_KVM_PRIVATE_MEM */ #include "kvm_mm.h" -@@ -396,6 +399,13 @@ static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) +@@ -362,6 +363,12 @@ static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) kvm_gmem_mark_prepared(folio); } + if (userfaultfd_minor(vmf->vma) && + !(vmf->flags & FAULT_FLAG_USERFAULT_CONTINUE)) { + folio_unlock(folio); -+ filemap_invalidate_unlock_shared(inode->i_mapping); + return handle_userfault(vmf, VM_UFFD_MINOR); + } + @@ -40,5 +37,5 @@ index 5abb6d52a375..91ee5dd91c31 100644 out_folio: -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0025-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0005-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch similarity index 88% rename from resources/hiding_ci/linux_patches/25-gmem-uffd/0025-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch rename to resources/hiding_ci/linux_patches/25-gmem-uffd/0005-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch index 777a2b05e66..12f976584ad 100644 --- a/resources/hiding_ci/linux_patches/25-gmem-uffd/0025-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch +++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0005-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch @@ -1,7 +1,7 @@ -From 6c5886204ff8d306cc4ee945235c88eb854ebf7f Mon Sep 17 00:00:00 2001 +From 6949cc0ef279aad43df07a04122d8ea073011862 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Fri, 4 Apr 2025 14:18:03 +0000 -Subject: [PATCH 25/26] mm: userfaultfd: add UFFD_FEATURE_MINOR_GUEST_MEMFD +Subject: [PATCH 5/6] mm: userfaultfd: add UFFD_FEATURE_MINOR_GUEST_MEMFD Signed-off-by: Nikita Kalyazin --- @@ -10,10 +10,10 @@ Signed-off-by: Nikita Kalyazin 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c -index 97c4d71115d8..32152bfa462a 100644 +index 22f4bf956ba1..15175e2928d6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c -@@ -1954,7 +1954,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, +@@ -1969,7 +1969,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR uffdio_api.features &= @@ -57,5 +57,5 @@ index 2841e4ea8f2c..ed688797eba7 100644 __u64 ioctls; -- -2.47.1 +2.49.0 diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0026-fixup-for-guest_memfd-uffd-v3.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0006-fixup-for-guest_memfd-uffd-v3.patch similarity index 56% rename from resources/hiding_ci/linux_patches/25-gmem-uffd/0026-fixup-for-guest_memfd-uffd-v3.patch rename to resources/hiding_ci/linux_patches/25-gmem-uffd/0006-fixup-for-guest_memfd-uffd-v3.patch index 2aa0a3bea09..696e8c9f831 100644 --- a/resources/hiding_ci/linux_patches/25-gmem-uffd/0026-fixup-for-guest_memfd-uffd-v3.patch +++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0006-fixup-for-guest_memfd-uffd-v3.patch @@ -1,70 +1,71 @@ -From d950436a063f021ae0d925509363106625eafe0f Mon Sep 17 00:00:00 2001 +From 357ef010655d59d9d138fa176674f3c62e0e21e9 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Thu, 10 Apr 2025 14:18:53 +0000 -Subject: [PATCH 26/26] fixup for guest_memfd uffd v3 +Subject: [PATCH 6/6] fixup for guest_memfd uffd v3 - implement can_userfault for guest_memfd - check vma->vm_ops pointer before dereferencing - proper check for VM_UFFD_MINOR --- - include/linux/userfaultfd_k.h | 6 ++++-- + include/linux/userfaultfd_k.h | 8 +++++--- mm/userfaultfd.c | 4 +++- - virt/kvm/guest_memfd.c | 9 ++++++++- + virt/kvm/guest_memfd.c | 7 +++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h -index 64551e8a55fb..080437fa7eab 100644 +index 64551e8a55fb..8a05a7880393 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h -@@ -221,8 +221,10 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, +@@ -221,9 +221,11 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, if (vm_flags & VM_DROPPABLE) return false; - if (!vma->vm_ops->can_userfault || - !vma->vm_ops->can_userfault(vma, VM_UFFD_MINOR)) -+ if ((vm_flags & VM_UFFD_MINOR) && -+ (!vma->vm_ops || -+ !vma->vm_ops->can_userfault || -+ !vma->vm_ops->can_userfault(vma, VM_UFFD_MINOR))) - return false; +- return false; ++ if ((vm_flags & VM_UFFD_MINOR) && ++ (!vma->vm_ops || ++ !vma->vm_ops->can_userfault || ++ !vma->vm_ops->can_userfault(vma, VM_UFFD_MINOR))) ++ return false; /* + * If wp async enabled, and WP is the only mode enabled, allow any diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c -index 0aa82c968e16..638360a78561 100644 +index 8d8df51615b5..6257e8fcbd49 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c -@@ -788,7 +788,9 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, +@@ -784,7 +784,9 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, return mfill_atomic_hugetlb(ctx, dst_vma, dst_start, src_start, len, flags); - can_userfault = dst_vma->vm_ops->can_userfault && -+ can_userfault = -+ dst_vma->vm_ops && -+ dst_vma->vm_ops->can_userfault && ++ can_userfault = ++ dst_vma->vm_ops && ++ dst_vma->vm_ops->can_userfault && dst_vma->vm_ops->can_userfault(dst_vma, __VM_UFFD_FLAGS); if (!vma_is_anonymous(dst_vma) && !can_userfault) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c -index 91ee5dd91c31..202b12dc4b6f 100644 +index 212c70b8da0c..672a3cf4796f 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c -@@ -420,8 +420,15 @@ static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) +@@ -380,8 +380,15 @@ static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) return ret; } +static bool kvm_gmem_can_userfault(struct vm_area_struct *vma, -+ unsigned long vm_flags) ++ unsigned long vm_flags) +{ -+ return vm_flags & VM_UFFD_MINOR; ++ return vm_flags & VM_UFFD_MINOR; +} + static const struct vm_operations_struct kvm_gmem_vm_ops = { -- .fault = kvm_gmem_fault, -+ .fault = kvm_gmem_fault, + .fault = kvm_gmem_fault_shared, + .can_userfault = kvm_gmem_can_userfault, }; static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) -- -2.47.1 +2.49.0