diff --git a/.buildkite/pipeline_pr.py b/.buildkite/pipeline_pr.py index be77a3fafd0..17c0df83d94 100755 --- a/.buildkite/pipeline_pr.py +++ b/.buildkite/pipeline_pr.py @@ -68,7 +68,9 @@ for step in kani_grp["steps"]: step["label"] = "🔍 Kani" -if not changed_files or (any(x.parent.name == "hiding_ci" for x in changed_files)): +if not changed_files or ( + any(parent.name == "hiding_ci" for x in changed_files for parent in x.parents) +): pipeline.build_group_per_arch( "🕵️ Build Secret Hiding Kernel", pipeline.devtool_test( diff --git a/resources/hiding_ci/patches/0002-mm-introduce-AS_NO_DIRECT_MAP.patch b/resources/hiding_ci/patches/0002-mm-introduce-AS_NO_DIRECT_MAP.patch new file mode 100644 index 00000000000..53dfc236022 --- /dev/null +++ b/resources/hiding_ci/patches/0002-mm-introduce-AS_NO_DIRECT_MAP.patch @@ -0,0 +1,208 @@ +From 138b7a4c83c43b42851cb8fec2bbdbaadd960241 Mon Sep 17 00:00:00 2001 +From: Patrick Roy +Date: Fri, 7 Feb 2025 11:16:06 +0000 +Subject: [PATCH 1/2] mm: introduce AS_NO_DIRECT_MAP + +Add AS_NO_DIRECT_MAP for mappings where direct map entries of folios are +set to not present . Currently, mappings that match this description are +secretmem mappings (memfd_secret()). Later, some guest_memfd +configurations will also fall into this category. + +Reject this new type of mappings in all locations that currently reject +secretmem mappings, on the assumption that if secretmem mappings are +rejected somewhere, it is precisely because of an inability to deal with +folios without direct map entries, and then make memfd_secret() use +AS_NO_DIRECT_MAP on its address_space to drop its special +vma_is_secretmem()/secretmem_mapping() checks. + +This drops a optimization in gup_fast_folio_allowed() where +secretmem_mapping() was only called if CONFIG_SECRETMEM=y. secretmem is +enabled by default since commit b758fe6df50d ("mm/secretmem: make it on +by default"), so the secretmem check did not actually end up elided in +most cases anymore anyway. + +Use a new flag instead of overloading AS_INACCESSIBLE (which is already +set by guest_memfd) because not all guest_memfd mappings will end up +being direct map removed (e.g. in pKVM setups, parts of guest_memfd that +can be mapped to userspace should also be GUP-able, and generally not +have restrictions on who can access it). + +Signed-off-by: Patrick Roy +--- + include/linux/pagemap.h | 16 ++++++++++++++++ + include/linux/secretmem.h | 18 ------------------ + lib/buildid.c | 4 ++-- + mm/gup.c | 14 +++----------- + mm/mlock.c | 2 +- + mm/secretmem.c | 6 +----- + 6 files changed, 23 insertions(+), 37 deletions(-) + +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index 47bfc6b1b632..903b41e89cf8 100644 +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -210,6 +210,7 @@ enum mapping_flags { + AS_STABLE_WRITES = 7, /* must wait for writeback before modifying + folio contents */ + AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ ++ AS_NO_DIRECT_MAP = 9, /* Folios in the mapping are not in the direct map */ + /* Bits 16-25 are used for FOLIO_ORDER */ + AS_FOLIO_ORDER_BITS = 5, + AS_FOLIO_ORDER_MIN = 16, +@@ -335,6 +336,21 @@ static inline bool mapping_inaccessible(struct address_space *mapping) + return test_bit(AS_INACCESSIBLE, &mapping->flags); + } + ++static inline void mapping_set_no_direct_map(struct address_space *mapping) ++{ ++ set_bit(AS_NO_DIRECT_MAP, &mapping->flags); ++} ++ ++static inline bool mapping_no_direct_map(struct address_space *mapping) ++{ ++ return test_bit(AS_NO_DIRECT_MAP, &mapping->flags); ++} ++ ++static inline bool vma_is_no_direct_map(const struct vm_area_struct *vma) ++{ ++ return vma->vm_file && mapping_no_direct_map(vma->vm_file->f_mapping); ++} ++ + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) + { + return mapping->gfp_mask; +diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h +index e918f96881f5..0ae1fb057b3d 100644 +--- a/include/linux/secretmem.h ++++ b/include/linux/secretmem.h +@@ -4,28 +4,10 @@ + + #ifdef CONFIG_SECRETMEM + +-extern const struct address_space_operations secretmem_aops; +- +-static inline bool secretmem_mapping(struct address_space *mapping) +-{ +- return mapping->a_ops == &secretmem_aops; +-} +- +-bool vma_is_secretmem(struct vm_area_struct *vma); + bool secretmem_active(void); + + #else + +-static inline bool vma_is_secretmem(struct vm_area_struct *vma) +-{ +- return false; +-} +- +-static inline bool secretmem_mapping(struct address_space *mapping) +-{ +- return false; +-} +- + static inline bool secretmem_active(void) + { + return false; +diff --git a/lib/buildid.c b/lib/buildid.c +index c4b0f376fb34..33f173a607ad 100644 +--- a/lib/buildid.c ++++ b/lib/buildid.c +@@ -65,8 +65,8 @@ static int freader_get_folio(struct freader *r, loff_t file_off) + + freader_put_folio(r); + +- /* reject secretmem folios created with memfd_secret() */ +- if (secretmem_mapping(r->file->f_mapping)) ++ /* reject secretmem folios created with memfd_secret() or guest_memfd() */ ++ if (mapping_no_direct_map(r->file->f_mapping)) + return -EFAULT; + + r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); +diff --git a/mm/gup.c b/mm/gup.c +index 3883b307780e..b1483a876740 100644 +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -1283,7 +1283,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) + if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) + return -EOPNOTSUPP; + +- if (vma_is_secretmem(vma)) ++ if (vma_is_no_direct_map(vma)) + return -EFAULT; + + if (write) { +@@ -2786,7 +2786,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) + { + bool reject_file_backed = false; + struct address_space *mapping; +- bool check_secretmem = false; + unsigned long mapping_flags; + + /* +@@ -2798,14 +2797,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) + reject_file_backed = true; + + /* We hold a folio reference, so we can safely access folio fields. */ +- +- /* secretmem folios are always order-0 folios. */ +- if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio)) +- check_secretmem = true; +- +- if (!reject_file_backed && !check_secretmem) +- return true; +- + if (WARN_ON_ONCE(folio_test_slab(folio))) + return false; + +@@ -2847,8 +2838,9 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags) + * At this point, we know the mapping is non-null and points to an + * address_space object. + */ +- if (check_secretmem && secretmem_mapping(mapping)) ++ if (mapping_no_direct_map(mapping)) + return false; ++ + /* The only remaining allowed file system is shmem. */ + return !reject_file_backed || shmem_mapping(mapping); + } +diff --git a/mm/mlock.c b/mm/mlock.c +index cde076fa7d5e..a43f308be70d 100644 +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -474,7 +474,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, + + if (newflags == oldflags || (oldflags & VM_SPECIAL) || + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || +- vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE)) ++ vma_is_dax(vma) || vma_is_no_direct_map(vma) || (oldflags & VM_DROPPABLE)) + /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ + goto out; + +diff --git a/mm/secretmem.c b/mm/secretmem.c +index 1b0a214ee558..ea4c04d469b1 100644 +--- a/mm/secretmem.c ++++ b/mm/secretmem.c +@@ -136,11 +136,6 @@ static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) + return 0; + } + +-bool vma_is_secretmem(struct vm_area_struct *vma) +-{ +- return vma->vm_ops == &secretmem_vm_ops; +-} +- + static const struct file_operations secretmem_fops = { + .release = secretmem_release, + .mmap = secretmem_mmap, +@@ -214,6 +209,7 @@ static struct file *secretmem_file_create(unsigned long flags) + + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); + mapping_set_unevictable(inode->i_mapping); ++ mapping_set_no_direct_map(inode->i_mapping); + + inode->i_op = &secretmem_iops; + inode->i_mapping->a_ops = &secretmem_aops; +-- +2.48.1 + diff --git a/resources/hiding_ci/patches/0003-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch b/resources/hiding_ci/patches/0003-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch new file mode 100644 index 00000000000..c46e04e8543 --- /dev/null +++ b/resources/hiding_ci/patches/0003-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch @@ -0,0 +1,178 @@ +From 9bbc39f9c7622f0060d395b1063a564c24926d8d Mon Sep 17 00:00:00 2001 +From: Patrick Roy +Date: Fri, 7 Feb 2025 14:33:01 +0000 +Subject: [PATCH 2/2] KVM: guest_memfd: Add flag to remove from direct map + +Add KVM_GMEM_NO_DIRECT_MAP flag for KVM_CREATE_GUEST_MEMFD() ioctl. When +set, guest_memfd folios will be removed from the direct map after +preparation, with direct map entries only restored when the folios are +freed. + +To ensure these folios do not end up in places where the kernel cannot +deal with them, set AS_NO_DIRECT_MAP on the guest_memfd's struct +address_space if KVM_GMEM_NO_DIRECT_MAP is requested. + +Add KVM_CAP_GMEM_NO_DIRECT_MAP to let userspace discover whether +guest_memfd supports KVM_GMEM_NO_DIRECT_MAP. Support depends on +guest_memfd itself being supported, but also on whether KVM can +manipulate the direct map at page granularity at all (possible most of +the time, just arm64 is a notable outlier where its impossible if the +direct map has been setup using hugepages, as arm64 cannot break these +apart due to break-before-make semantics). + +Note that this flag causes removal of direct map entries for all +guest_memfd folios independent of whether they are "shared" or "private" +(although current guest_memfd only supports either all folios in the +"shared" state, or all folios in the "private" state if +!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)). The usecase for removing +direct map entries of also the shared parts of guest_memfd are a special +type of non-CoCo VM where, host userspace is trusted to have access to +all of guest memory, but where Spectre-style transient execution attacks +through the host kernel's direct map should still be mitigated. + +Note that KVM retains access to guest memory via userspace +mappings of guest_memfd, which are reflected back into KVM's memslots +via userspace_addr. This is needed for things like MMIO emulation on +x86_64 to work. Previous iterations attempted to instead have KVM +temporarily restore direct map entries whenever such an access to guest +memory was needed, but this turned out to have a significant performance +impact, as well as additional complexity due to needing to refcount +direct map reinsertion operations and making them play nicely with gmem +truncations. + +This iteration also doesn't have KVM perform TLB flushes after direct +map manipulations. This is because TLB flushes resulted in a up to 40x +elongation of page faults in guest_memfd (scaling with the number of CPU +cores), or a 5x elongation of memory population. On the one hand, TLB +flushes are not needed for functional correctness (the virt->phys +mapping technically stays "correct", the kernel should simply to not it +for a while), so this is a correct optimization to make. On the other +hand, it means that the desired protection from Spectre-style attacks is +not perfect, as an attacker could try to prevent a stale TLB entry from +getting evicted, keeping it alive until the page it refers to is used by +the guest for some sensitive data, and then targeting it using a +spectre-gadget. + +Signed-off-by: Patrick Roy +--- + include/uapi/linux/kvm.h | 3 +++ + virt/kvm/guest_memfd.c | 28 +++++++++++++++++++++++++++- + virt/kvm/kvm_main.c | 5 +++++ + 3 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h +index 117937a895da..fb02a93546d8 100644 +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -930,6 +930,7 @@ struct kvm_enable_cap { + #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 + #define KVM_CAP_X86_GUEST_MODE 238 + #define KVM_CAP_GMEM_SHARED_MEM 239 ++#define KVM_CAP_GMEM_NO_DIRECT_MAP 240 + + struct kvm_irq_routing_irqchip { + __u32 irqchip; +@@ -1573,6 +1574,8 @@ struct kvm_create_guest_memfd { + __u64 reserved[6]; + }; + ++#define KVM_GMEM_NO_DIRECT_MAP (1ULL << 0) ++ + #define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory) + + struct kvm_pre_fault_memory { +diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c +index fbf89e643add..a2b96bc51391 100644 +--- a/virt/kvm/guest_memfd.c ++++ b/virt/kvm/guest_memfd.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + + #include "kvm_mm.h" + +@@ -50,8 +51,23 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo + return 0; + } + ++static bool kvm_gmem_test_no_direct_map(struct inode *inode) ++{ ++ return ((unsigned long) inode->i_private) & KVM_GMEM_NO_DIRECT_MAP; ++} ++ + static inline void kvm_gmem_mark_prepared(struct folio *folio) + { ++ struct inode *inode = folio_inode(folio); ++ ++ if (kvm_gmem_test_no_direct_map(inode)) { ++ int r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), ++ false); ++ ++ if (!r) ++ folio_set_private(folio); ++ } ++ + folio_mark_uptodate(folio); + } + +@@ -478,6 +494,10 @@ static void kvm_gmem_free_folio(struct folio *folio) + kvm_pfn_t pfn = page_to_pfn(page); + int order = folio_order(folio); + ++ if (folio_test_private(folio)) ++ WARN_ON_ONCE(set_direct_map_valid_noflush(folio_page(folio, 0), ++ folio_nr_pages(folio), true)); ++ + kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); + } + #endif +@@ -551,6 +571,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) + /* Unmovable mappings are supposed to be marked unevictable as well. */ + WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); + ++ if (flags & KVM_GMEM_NO_DIRECT_MAP) ++ mapping_set_no_direct_map(inode->i_mapping); ++ + kvm_get_kvm(kvm); + gmem->kvm = kvm; + xa_init(&gmem->bindings); +@@ -570,7 +593,10 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) + { + loff_t size = args->size; + u64 flags = args->flags; +- u64 valid_flags = 0; ++ u64 valid_flags = KVM_GMEM_NO_DIRECT_MAP; ++ ++ if (!can_set_direct_map()) ++ valid_flags &= ~KVM_GMEM_NO_DIRECT_MAP; + + if (flags & ~valid_flags) + return -EINVAL; +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 3e40acb9f5c0..32ca1c921ab0 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -65,6 +65,7 @@ + #include + + #include ++#include + + + /* Worst case buffer size needed for holding an integer. */ +@@ -4823,6 +4824,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) + return kvm_supported_mem_attributes(kvm); + #endif + #ifdef CONFIG_KVM_PRIVATE_MEM ++ case KVM_CAP_GMEM_NO_DIRECT_MAP: ++ if (!can_set_direct_map()) ++ return false; ++ fallthrough; + case KVM_CAP_GUEST_MEMFD: + return !kvm || kvm_arch_has_private_mem(kvm); + #endif +-- +2.48.1 +