diff --git a/.buildkite/common.py b/.buildkite/common.py
index fc74a32e65f..64ca40ba9ea 100644
--- a/.buildkite/common.py
+++ b/.buildkite/common.py
@@ -32,6 +32,7 @@
 DEFAULT_PLATFORMS = [
     ("al2", "linux_5.10"),
     ("al2023", "linux_6.1"),
+    ("al2023", "secret_hiding"),
 ]
 
 
@@ -123,10 +124,12 @@ def run_all_tests(changed_files):
     """
 
     # run the whole test suite if either of:
-    # - any file changed that is not documentation nor GitHub action config file
+    # - any file changed that is not documentation nor GitHub action config file, nor secret hiding patch series
     # - no files changed
     return not changed_files or any(
-        x.suffix != ".md" and not (x.parts[0] == ".github" and x.suffix == ".yml")
+        x.suffix != ".md"
+        and not (x.parts[0] == ".github" and x.suffix == ".yml")
+        and (len(x.parts) < 2 or x.parts[1] != "hiding_ci")
         for x in changed_files
     )
 
diff --git a/.buildkite/pipeline_perf.py b/.buildkite/pipeline_perf.py
index 66a9314f2d4..78cdc56b19f 100755
--- a/.buildkite/pipeline_perf.py
+++ b/.buildkite/pipeline_perf.py
@@ -49,6 +49,7 @@
         "label": "📸 Memory Population Latency",
         "tests": "integration_tests/performance/test_snapshot.py::test_population_latency",
         "devtool_opts": "-c 1-12 -m 0",
+        "timeout_in_minutes": 90,
     },
     "vsock-throughput": {
         "label": "🧦 Vsock Throughput",
diff --git a/.buildkite/pipeline_pr.py b/.buildkite/pipeline_pr.py
index 8744a0dcb6a..b212b8983da 100755
--- a/.buildkite/pipeline_pr.py
+++ b/.buildkite/pipeline_pr.py
@@ -70,6 +70,17 @@
     for step in kani_grp["steps"]:
         step["label"] = "🔍 Kani"
 
+if not changed_files or (
+    any(parent.name == "hiding_ci" for x in changed_files for parent in x.parents)
+):
+    pipeline.build_group_per_arch(
+        "🕵️ Build Secret Hiding Kernel",
+        pipeline.devtool_test(
+            pytest_opts="-m secret_hiding integration_tests/build/test_hiding_kernel.py",
+        ),
+        depends_on_build=False,
+    )
+
 if run_all_tests(changed_files):
     pipeline.build_group(
         "📦 Build",
diff --git a/Cargo.toml b/Cargo.toml
index a1c9ad79621..7094182bce8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,7 @@ tests_outside_test_module = "warn"
 assertions_on_result_states = "warn"
 error_impl_error = "warn"
 or_fun_call = "warn"
+needless-update = "allow"
 
 [profile.dev]
 panic = "abort"
diff --git a/resources/hiding_ci/build_and_install_kernel.sh b/resources/hiding_ci/build_and_install_kernel.sh
new file mode 100755
index 00000000000..4b35ad08a7d
--- /dev/null
+++ b/resources/hiding_ci/build_and_install_kernel.sh
@@ -0,0 +1,240 @@
+#!/bin/bash
+# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# fail if we encounter an error, uninitialized variable or a pipe breaks
+set -eu -o pipefail
+
+check_root() {
+  # We need sudo privileges to install the kernel
+  if [ "$(id -u)" -ne 0 ]; then
+    echo "To install, this script must be run as root or with sudo privileges"
+    exit 1
+  fi
+}
+
+check_userspace() {
+  # Currently this script only works on Ubuntu and AL2023
+  if grep -qi 'ubuntu' /etc/os-release; then
+    USERSPACE="UBUNTU"
+    return 0
+  fi
+
+  if grep -qi 'al2023' /etc/os-release; then
+    USERSPACE="AL2023"
+    return 0
+  fi
+
+  echo "This script currently only works on Ubuntu and Amazon Linux 2023."
+  exit 1
+}
+
+install_build_deps() {
+  case $USERSPACE in
+    "UBUNTU")
+	    apt-get update && apt-get install -y make bsdmainutils flex yacc bison bc xz-utils libelf-dev elfutils libssl-dev
+	    ;;
+    "AL2023")
+	    yum -y groupinstall "Development Tools"
+	    yum -y install make openssl-devel dkms
+	    ;;
+  esac
+}
+
+tidy_up() {
+  # Some cleanup after we are done
+  echo "Cleaning up.."
+  cd $START_DIR
+  rm -rf $TMP_BUILD_DIR
+}
+
+confirm() {
+  if [[ "$*" == *"--no-install"* ]]; then
+    echo "Not installing new kernel."
+
+    if [[ "$*" == *"--tidy"* ]]; then
+      tidy_up
+    fi
+
+    exit 0
+  fi
+
+  if [[ "$*" == *"--install"* ]]; then
+    return 0
+  fi
+
+  while true; do
+    read -p "Do you want to install the new kernel? (y/n) " yn
+    case $yn in
+    [Yy]*) return 0 ;;
+    [Nn]*)
+      echo "Exiting..."
+      exit 1
+      ;;
+    *) echo "Please answer yes or no." ;;
+    esac
+  done
+}
+
+apply_patch_file() {
+  echo "Applying patch:" $(basename $1)
+
+  git apply $1
+}
+
+apply_patch_or_series() {
+  case "$1" in
+  *.patch) apply_patch_file $1 ;;
+  *) echo "Skipping non-patch file" $1 ;;
+  esac
+}
+
+apply_all_patches() {
+  if [ ! -d "$1" ]; then
+    echo "Not a directory: $1"
+    return
+  fi
+
+  echo "Applying all patches in $1"
+
+  for f in $1/*; do
+    if [ -d $f ]; then
+      apply_all_patches $f
+    else
+      apply_patch_or_series $f
+    fi
+  done
+}
+
+check_new_config() {
+  if [[ -e "/boot/config-$KERNEL_VERSION" ]]; then
+    return 0;
+  fi
+
+  echo "Storing new config in /boot/config-$KERNEL_VERSION"
+  cp .config /boot/config-$KERNEL_VERSION
+}
+
+check_override_presence() {
+  while IFS= read -r line; do
+    if ! grep -Fq "$line" .config; then
+      echo "Missing config: $line"
+      exit 1
+    fi
+  done <"$KERNEL_CONFIG_OVERRIDES"
+
+  echo "All overrides correctly applied.."
+}
+
+ubuntu_update_boot() {
+  echo "Update initramfs"
+  update-initramfs -c -k $KERNEL_VERSION
+  echo "Updating GRUB..."
+  update-grub
+}
+
+al2023_update_boot() {
+  echo "Installing ENA driver for AL2023"
+  $START_DIR/install_ena.sh $KERNEL_VERSION $START_DIR/dkms.conf
+
+  # Just ensure we are back in the build dir
+  cd $TMP_BUILD_DIR
+
+  echo "Creating the new ram disk"
+  dracut --kver $KERNEL_VERSION -f -v
+
+  # This varies from x86 and ARM so capture what was generated
+  # We add the || true here due to the fact that we have pipefail enabled
+  # this causes a non 0 exit when ls cant find vmlinux or vmlinux
+  VM_LINUX_LOCATION=$(ls /boot/vmlinu{x,z}-$KERNEL_VERSION 2>/dev/null | head -n1 || true)
+
+  echo "Updating GRUB..."
+  grubby --grub2 --add-kernel $VM_LINUX_LOCATION \
+    --title="Secret Hiding" \
+    --initrd=/boot/initramfs-$KERNEL_VERSION.img --copy-default
+  grubby --set-default $VM_LINUX_LOCATION
+}
+
+update_boot_config() {
+  case "$USERSPACE" in
+  UBUNTU) ubuntu_update_boot ;;
+  AL2023) al2023_update_boot ;;
+  *)
+    echo "Unknown userspace"
+    exit 1
+    ;;
+  esac
+}
+
+check_userspace
+install_build_deps
+
+KERNEL_URL=$(cat kernel_url)
+KERNEL_COMMIT_HASH=$(cat kernel_commit_hash)
+KERNEL_PATCHES_DIR=$(pwd)/linux_patches
+KERNEL_CONFIG_OVERRIDES=$(pwd)/kernel_config_overrides
+
+TMP_BUILD_DIR=$(mktemp -d -t kernel-build-XXXX)
+
+START_DIR=$(pwd)
+
+cd $TMP_BUILD_DIR
+
+echo "Cloning kernel repository into" $TMP_BUILD_DIR
+
+# We checkout the repository that way to make it as
+# small and fast as possible
+git init
+git remote add origin $KERNEL_URL
+git fetch --depth 1 origin $KERNEL_COMMIT_HASH
+git checkout FETCH_HEAD
+
+# Apply our patches on top
+apply_all_patches $KERNEL_PATCHES_DIR
+
+echo "Making kernel config ready for build"
+# We use olddefconfig to automatically pull in the
+# config from the AMI and update to the newest
+# defaults
+make olddefconfig
+
+# Disable the ubuntu keys
+scripts/config --disable SYSTEM_TRUSTED_KEYS
+scripts/config --disable SYSTEM_REVOCATION_KEYS
+
+# Apply our config overrides on top of the config
+scripts/kconfig/merge_config.sh -m .config $KERNEL_CONFIG_OVERRIDES
+
+check_override_presence
+
+# We run this again to default options now changed by
+# the disabling of the ubuntu keys
+make olddefconfig
+
+echo "Building kernel this may take a while"
+make -s -j $(nproc)
+echo "Building kernel modules"
+make modules -s -j $(nproc)
+echo "Kernel build complete!"
+
+KERNEL_VERSION=$(KERNELVERSION=$(make -s kernelversion) ./scripts/setlocalversion)
+
+echo "New kernel version:" $KERNEL_VERSION
+
+# Make sure a user really wants to install this kernel
+confirm "$@"
+
+check_root
+
+echo "Installing kernel modules..."
+make INSTALL_MOD_STRIP=1 modules_install
+echo "Installing kernel..."
+make INSTALL_MOD_STRIP=1 install
+
+update_boot_config
+
+check_new_config
+
+echo "Kernel built and installed successfully!"
+
+tidy_up
diff --git a/resources/hiding_ci/dkms.conf b/resources/hiding_ci/dkms.conf
new file mode 100644
index 00000000000..29f108ba298
--- /dev/null
+++ b/resources/hiding_ci/dkms.conf
@@ -0,0 +1,10 @@
+PACKAGE_NAME="ena"
+PACKAGE_VERSION="1.0.0"
+CLEAN="make -C kernel/linux/ena clean"
+MAKE="make -C kernel/linux/ena/ BUILD_KERNEL=${kernelver}"
+BUILT_MODULE_NAME[0]="ena"
+BUILT_MODULE_LOCATION="kernel/linux/ena"
+DEST_MODULE_LOCATION[0]="/updates"
+DEST_MODULE_NAME[0]="ena"
+REMAKE_INITRD="yes"
+AUTOINSTALL="yes"
diff --git a/resources/hiding_ci/install_ena.sh b/resources/hiding_ci/install_ena.sh
new file mode 100755
index 00000000000..7d0fd679395
--- /dev/null
+++ b/resources/hiding_ci/install_ena.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# # SPDX-License-Identifier: Apache-2.0
+
+# fail if we encounter an error, uninitialized variable or a pipe breaks
+set -eu -o pipefail
+
+AMZN_DRIVER_VERSION="2.13.3"
+KERNEL_VERSION=$1
+DKMS_CONF_LOCATION=$2
+START_DIR=$(pwd)
+
+cd /tmp/
+
+git clone --depth=1 https://github.com/amzn/amzn-drivers.git
+mv amzn-drivers /usr/src/amzn-drivers-${AMZN_DRIVER_VERSION}
+
+cp $DKMS_CONF_LOCATION /usr/src/amzn-drivers-${AMZN_DRIVER_VERSION}
+
+dkms add -m amzn-drivers -v ${AMZN_DRIVER_VERSION}
+dkms build -k ${KERNEL_VERSION} -m amzn-drivers -v ${AMZN_DRIVER_VERSION}
+dkms install -k ${KERNEL_VERSION} -m amzn-drivers -v ${AMZN_DRIVER_VERSION}
+
+cd $START_DIR
diff --git a/resources/hiding_ci/kernel_commit_hash b/resources/hiding_ci/kernel_commit_hash
new file mode 100644
index 00000000000..78e69f2ce1d
--- /dev/null
+++ b/resources/hiding_ci/kernel_commit_hash
@@ -0,0 +1 @@
+a6ad54137af92535cfe32e19e5f3bc1bb7dbd383
\ No newline at end of file
diff --git a/resources/hiding_ci/kernel_config_overrides b/resources/hiding_ci/kernel_config_overrides
new file mode 100644
index 00000000000..6cb1dd1f894
--- /dev/null
+++ b/resources/hiding_ci/kernel_config_overrides
@@ -0,0 +1,17 @@
+CONFIG_EXPERT=y
+CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_DEV_CCP=y
+CONFIG_CRYPTO_DEV_CCP_DD=y
+CONFIG_CRYPTO_DEV_SP_PSP=y
+CONFIG_KVM=y
+CONFIG_KVM_SW_PROTECTED_VM=y
+CONFIG_KVM_AMD=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD_SEV=y
+CONFIG_KVM_PRIVATE_MEM=y
+CONFIG_KVM_GENERIC_MMU_NOTIFIER=y
+CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y
+CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES=y
+CONFIG_KVM_GENERIC_PRIVATE_MEM=y
+CONFIG_DEBUG_INFO=y
+CONFIG_KVM_XEN=n
diff --git a/resources/hiding_ci/kernel_url b/resources/hiding_ci/kernel_url
new file mode 100644
index 00000000000..ce6e1a3e6a8
--- /dev/null
+++ b/resources/hiding_ci/kernel_url
@@ -0,0 +1 @@
+git://git.kernel.org/pub/scm/virt/kvm/kvm.git
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0001-filemap-Pass-address_space-mapping-to-free_folio.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0001-filemap-Pass-address_space-mapping-to-free_folio.patch
new file mode 100644
index 00000000000..2ba864654d3
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0001-filemap-Pass-address_space-mapping-to-free_folio.patch
@@ -0,0 +1,214 @@
+From bb48d72a9b84f24ec2794b1b42b8b8192ed452d5 Mon Sep 17 00:00:00 2001
+From: Elliot Berman <quic_eberman@quicinc.com>
+Date: Fri, 22 Nov 2024 09:29:38 -0800
+Subject: [PATCH 01/10] filemap: Pass address_space mapping to ->free_folio()
+
+When guest_memfd removes memory from the host kernel's direct map,
+direct map entries must be restored before the memory is freed again. To
+do so, ->free_folio() needs to know whether a gmem folio was direct map
+removed in the first place though. While possible to keep track of this
+information on each individual folio (e.g. via page flags), direct map
+removal is an all-or-nothing property of the entire guest_memfd, so it
+is less error prone to just check the flag stored in the gmem inode's
+private data.  However, by the time ->free_folio() is called,
+folio->mapping might be cleared. To still allow access to the address
+space from which the folio was just removed, pass it in as an additional
+argument to ->free_folio, as the mapping is well-known to all callers.
+
+Link: https://lore.kernel.org/all/15f665b4-2d33-41ca-ac50-fafe24ade32f@redhat.com/
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
+[patrick: rewrite shortlog for new usecase]
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ Documentation/filesystems/locking.rst |  2 +-
+ fs/nfs/dir.c                          | 11 ++++++-----
+ fs/orangefs/inode.c                   |  3 ++-
+ include/linux/fs.h                    |  2 +-
+ mm/filemap.c                          |  9 +++++----
+ mm/secretmem.c                        |  3 ++-
+ mm/vmscan.c                           |  4 ++--
+ virt/kvm/guest_memfd.c                |  3 ++-
+ 8 files changed, 21 insertions(+), 16 deletions(-)
+
+diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
+index aa287ccdac2f..74c97287ec40 100644
+--- a/Documentation/filesystems/locking.rst
++++ b/Documentation/filesystems/locking.rst
+@@ -262,7 +262,7 @@ prototypes::
+ 	sector_t (*bmap)(struct address_space *, sector_t);
+ 	void (*invalidate_folio) (struct folio *, size_t start, size_t len);
+ 	bool (*release_folio)(struct folio *, gfp_t);
+-	void (*free_folio)(struct folio *);
++	void (*free_folio)(struct address_space *, struct folio *);
+ 	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+ 	int (*migrate_folio)(struct address_space *, struct folio *dst,
+ 			struct folio *src, enum migrate_mode);
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index d81217923936..644bd54e052c 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -55,7 +55,7 @@ static int nfs_closedir(struct inode *, struct file *);
+ static int nfs_readdir(struct file *, struct dir_context *);
+ static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
+ static loff_t nfs_llseek_dir(struct file *, loff_t, int);
+-static void nfs_readdir_clear_array(struct folio *);
++static void nfs_readdir_clear_array(struct address_space *, struct folio *);
+ static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ 			 umode_t mode, int open_flags);
+ 
+@@ -218,7 +218,8 @@ static void nfs_readdir_folio_init_array(struct folio *folio, u64 last_cookie,
+ /*
+  * we are freeing strings created by nfs_add_to_readdir_array()
+  */
+-static void nfs_readdir_clear_array(struct folio *folio)
++static void nfs_readdir_clear_array(struct address_space *mapping,
++				    struct folio *folio)
+ {
+ 	struct nfs_cache_array *array;
+ 	unsigned int i;
+@@ -233,7 +234,7 @@ static void nfs_readdir_clear_array(struct folio *folio)
+ static void nfs_readdir_folio_reinit_array(struct folio *folio, u64 last_cookie,
+ 					   u64 change_attr)
+ {
+-	nfs_readdir_clear_array(folio);
++	nfs_readdir_clear_array(folio->mapping, folio);
+ 	nfs_readdir_folio_init_array(folio, last_cookie, change_attr);
+ }
+ 
+@@ -249,7 +250,7 @@ nfs_readdir_folio_array_alloc(u64 last_cookie, gfp_t gfp_flags)
+ static void nfs_readdir_folio_array_free(struct folio *folio)
+ {
+ 	if (folio) {
+-		nfs_readdir_clear_array(folio);
++		nfs_readdir_clear_array(folio->mapping, folio);
+ 		folio_put(folio);
+ 	}
+ }
+@@ -391,7 +392,7 @@ static void nfs_readdir_folio_init_and_validate(struct folio *folio, u64 cookie,
+ 	if (folio_test_uptodate(folio)) {
+ 		if (nfs_readdir_folio_validate(folio, cookie, change_attr))
+ 			return;
+-		nfs_readdir_clear_array(folio);
++		nfs_readdir_clear_array(folio->mapping, folio);
+ 	}
+ 	nfs_readdir_folio_init_array(folio, cookie, change_attr);
+ 	folio_mark_uptodate(folio);
+diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
+index a01400cd41fd..37227ba71593 100644
+--- a/fs/orangefs/inode.c
++++ b/fs/orangefs/inode.c
+@@ -452,7 +452,8 @@ static bool orangefs_release_folio(struct folio *folio, gfp_t foo)
+ 	return !folio_test_private(folio);
+ }
+ 
+-static void orangefs_free_folio(struct folio *folio)
++static void orangefs_free_folio(struct address_space *mapping,
++				struct folio *folio)
+ {
+ 	kfree(folio_detach_private(folio));
+ }
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index d7ab4f96d705..afb0748ffda6 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -457,7 +457,7 @@ struct address_space_operations {
+ 	sector_t (*bmap)(struct address_space *, sector_t);
+ 	void (*invalidate_folio) (struct folio *, size_t offset, size_t len);
+ 	bool (*release_folio)(struct folio *, gfp_t);
+-	void (*free_folio)(struct folio *folio);
++	void (*free_folio)(struct address_space *, struct folio *folio);
+ 	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+ 	/*
+ 	 * migrate the contents of a folio to the specified target. If
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 751838ef05e5..3dd8ad922d80 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -226,11 +226,11 @@ void __filemap_remove_folio(struct folio *folio, void *shadow)
+ 
+ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
+ {
+-	void (*free_folio)(struct folio *);
++	void (*free_folio)(struct address_space *, struct folio *);
+ 
+ 	free_folio = mapping->a_ops->free_folio;
+ 	if (free_folio)
+-		free_folio(folio);
++		free_folio(mapping, folio);
+ 
+ 	folio_put_refs(folio, folio_nr_pages(folio));
+ }
+@@ -820,7 +820,8 @@ EXPORT_SYMBOL(file_write_and_wait_range);
+ void replace_page_cache_folio(struct folio *old, struct folio *new)
+ {
+ 	struct address_space *mapping = old->mapping;
+-	void (*free_folio)(struct folio *) = mapping->a_ops->free_folio;
++	void (*free_folio)(struct address_space *, struct folio *) =
++		mapping->a_ops->free_folio;
+ 	pgoff_t offset = old->index;
+ 	XA_STATE(xas, &mapping->i_pages, offset);
+ 
+@@ -849,7 +850,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
+ 		__lruvec_stat_add_folio(new, NR_SHMEM);
+ 	xas_unlock_irq(&xas);
+ 	if (free_folio)
+-		free_folio(old);
++		free_folio(mapping, old);
+ 	folio_put(old);
+ }
+ EXPORT_SYMBOL_GPL(replace_page_cache_folio);
+diff --git a/mm/secretmem.c b/mm/secretmem.c
+index 60137305bc20..422dcaa32506 100644
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -150,7 +150,8 @@ static int secretmem_migrate_folio(struct address_space *mapping,
+ 	return -EBUSY;
+ }
+ 
+-static void secretmem_free_folio(struct folio *folio)
++static void secretmem_free_folio(struct address_space *mapping,
++				 struct folio *folio)
+ {
+ 	set_direct_map_default_noflush(folio_page(folio, 0));
+ 	folio_zero_segment(folio, 0, folio_size(folio));
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index a48aec8bfd92..559bd6ac965c 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -788,7 +788,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio,
+ 		xa_unlock_irq(&mapping->i_pages);
+ 		put_swap_folio(folio, swap);
+ 	} else {
+-		void (*free_folio)(struct folio *);
++		void (*free_folio)(struct address_space *, struct folio *);
+ 
+ 		free_folio = mapping->a_ops->free_folio;
+ 		/*
+@@ -817,7 +817,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio,
+ 		spin_unlock(&mapping->host->i_lock);
+ 
+ 		if (free_folio)
+-			free_folio(folio);
++			free_folio(mapping, folio);
+ 	}
+ 
+ 	return 1;
+diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
+index 08a6bc7d25b6..9ec4c45e3cf2 100644
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -430,7 +430,8 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
+ }
+ 
+ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+-static void kvm_gmem_free_folio(struct folio *folio)
++static void kvm_gmem_free_folio(struct address_space *mapping,
++				struct folio *folio)
+ {
+ 	struct page *page = folio_page(folio, 0);
+ 	kvm_pfn_t pfn = page_to_pfn(page);
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0002-arch-export-set_direct_map_valid_noflush-to-KVM-modu.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0002-arch-export-set_direct_map_valid_noflush-to-KVM-modu.patch
new file mode 100644
index 00000000000..603fb28be3c
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0002-arch-export-set_direct_map_valid_noflush-to-KVM-modu.patch
@@ -0,0 +1,83 @@
+From bac2ab6d8e85b2003df1685b5393dfb6095b4468 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Mon, 2 Jun 2025 12:06:10 +0100
+Subject: [PATCH 02/10] arch: export set_direct_map_valid_noflush to KVM module
+
+Use the new per-module export functionality to allow KVM (and only KVM)
+access to set_direct_map_valid_noflush(). This allows guest_memfd to
+remove its memory from the direct map, even if KVM is built as a module.
+
+Direct map removal gives guest_memfd the same protection that
+memfd_secret enjoys, such as hardening against Spectre-like attacks
+through in-kernel gadgets.
+
+Reviewed-by: Fuad Tabba <tabba@google.com>
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ arch/arm64/mm/pageattr.c     | 1 +
+ arch/loongarch/mm/pageattr.c | 1 +
+ arch/riscv/mm/pageattr.c     | 1 +
+ arch/s390/mm/pageattr.c      | 1 +
+ arch/x86/mm/pat/set_memory.c | 1 +
+ 5 files changed, 5 insertions(+)
+
+diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
+index 04d4a8f676db..4f3cddfab9b0 100644
+--- a/arch/arm64/mm/pageattr.c
++++ b/arch/arm64/mm/pageattr.c
+@@ -291,6 +291,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+ 
+ 	return set_memory_valid(addr, nr, valid);
+ }
++EXPORT_SYMBOL_FOR_MODULES(set_direct_map_valid_noflush, "kvm");
+ 
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c
+index f5e910b68229..458f5ae6a89b 100644
+--- a/arch/loongarch/mm/pageattr.c
++++ b/arch/loongarch/mm/pageattr.c
+@@ -236,3 +236,4 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+ 
+ 	return __set_memory(addr, 1, set, clear);
+ }
++EXPORT_SYMBOL_FOR_MODULES(set_direct_map_valid_noflush, "kvm");
+diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
+index 3f76db3d2769..6db31040cd66 100644
+--- a/arch/riscv/mm/pageattr.c
++++ b/arch/riscv/mm/pageattr.c
+@@ -400,6 +400,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+ 
+ 	return __set_memory((unsigned long)page_address(page), nr, set, clear);
+ }
++EXPORT_SYMBOL_FOR_MODULES(set_direct_map_valid_noflush, "kvm");
+ 
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+ static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data)
+diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
+index 348e759840e7..8ffd9ef09bc6 100644
+--- a/arch/s390/mm/pageattr.c
++++ b/arch/s390/mm/pageattr.c
+@@ -413,6 +413,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+ 
+ 	return __set_memory((unsigned long)page_to_virt(page), nr, flags);
+ }
++EXPORT_SYMBOL_FOR_MODULES(set_direct_map_valid_noflush, "kvm");
+ 
+ bool kernel_page_present(struct page *page)
+ {
+diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
+index 8834c76f91c9..87e9c7d2dcdc 100644
+--- a/arch/x86/mm/pat/set_memory.c
++++ b/arch/x86/mm/pat/set_memory.c
+@@ -2661,6 +2661,7 @@ int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+ 
+ 	return __set_pages_np(page, nr);
+ }
++EXPORT_SYMBOL_FOR_MODULES(set_direct_map_valid_noflush, "kvm");
+ 
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+ void __kernel_map_pages(struct page *page, int numpages, int enable)
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0003-mm-introduce-AS_NO_DIRECT_MAP.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0003-mm-introduce-AS_NO_DIRECT_MAP.patch
new file mode 100644
index 00000000000..5509d12dedc
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0003-mm-introduce-AS_NO_DIRECT_MAP.patch
@@ -0,0 +1,239 @@
+From 5f6171141c067bb8978f7176c89f5e37795baae2 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Fri, 7 Feb 2025 11:16:06 +0000
+Subject: [PATCH 03/10] mm: introduce AS_NO_DIRECT_MAP
+
+Add AS_NO_DIRECT_MAP for mappings where direct map entries of folios are
+set to not present . Currently, mappings that match this description are
+secretmem mappings (memfd_secret()). Later, some guest_memfd
+configurations will also fall into this category.
+
+Reject this new type of mappings in all locations that currently reject
+secretmem mappings, on the assumption that if secretmem mappings are
+rejected somewhere, it is precisely because of an inability to deal with
+folios without direct map entries, and then make memfd_secret() use
+AS_NO_DIRECT_MAP on its address_space to drop its special
+vma_is_secretmem()/secretmem_mapping() checks.
+
+This drops a optimization in gup_fast_folio_allowed() where
+secretmem_mapping() was only called if CONFIG_SECRETMEM=y. secretmem is
+enabled by default since commit b758fe6df50d ("mm/secretmem: make it on
+by default"), so the secretmem check did not actually end up elided in
+most cases anymore anyway.
+
+Use a new flag instead of overloading AS_INACCESSIBLE (which is already
+set by guest_memfd) because not all guest_memfd mappings will end up
+being direct map removed (e.g. in pKVM setups, parts of guest_memfd that
+can be mapped to userspace should also be GUP-able, and generally not
+have restrictions on who can access it).
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ include/linux/pagemap.h   | 16 ++++++++++++++++
+ include/linux/secretmem.h | 18 ------------------
+ lib/buildid.c             |  4 ++--
+ mm/gup.c                  | 19 +++++--------------
+ mm/mlock.c                |  2 +-
+ mm/secretmem.c            |  8 ++------
+ 6 files changed, 26 insertions(+), 41 deletions(-)
+
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index 12a12dae727d..1f5739f6a9f5 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -211,6 +211,7 @@ enum mapping_flags {
+ 				   folio contents */
+ 	AS_INACCESSIBLE = 8,	/* Do not attempt direct R/W access to the mapping */
+ 	AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9,
++	AS_NO_DIRECT_MAP = 10,	/* Folios in the mapping are not in the direct map */
+ 	/* Bits 16-25 are used for FOLIO_ORDER */
+ 	AS_FOLIO_ORDER_BITS = 5,
+ 	AS_FOLIO_ORDER_MIN = 16,
+@@ -346,6 +347,21 @@ static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_spac
+ 	return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags);
+ }
+ 
++static inline void mapping_set_no_direct_map(struct address_space *mapping)
++{
++	set_bit(AS_NO_DIRECT_MAP, &mapping->flags);
++}
++
++static inline bool mapping_no_direct_map(const struct address_space *mapping)
++{
++	return test_bit(AS_NO_DIRECT_MAP, &mapping->flags);
++}
++
++static inline bool vma_has_no_direct_map(const struct vm_area_struct *vma)
++{
++	return vma->vm_file && mapping_no_direct_map(vma->vm_file->f_mapping);
++}
++
+ static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
+ {
+ 	return mapping->gfp_mask;
+diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
+index e918f96881f5..0ae1fb057b3d 100644
+--- a/include/linux/secretmem.h
++++ b/include/linux/secretmem.h
+@@ -4,28 +4,10 @@
+ 
+ #ifdef CONFIG_SECRETMEM
+ 
+-extern const struct address_space_operations secretmem_aops;
+-
+-static inline bool secretmem_mapping(struct address_space *mapping)
+-{
+-	return mapping->a_ops == &secretmem_aops;
+-}
+-
+-bool vma_is_secretmem(struct vm_area_struct *vma);
+ bool secretmem_active(void);
+ 
+ #else
+ 
+-static inline bool vma_is_secretmem(struct vm_area_struct *vma)
+-{
+-	return false;
+-}
+-
+-static inline bool secretmem_mapping(struct address_space *mapping)
+-{
+-	return false;
+-}
+-
+ static inline bool secretmem_active(void)
+ {
+ 	return false;
+diff --git a/lib/buildid.c b/lib/buildid.c
+index c4b0f376fb34..89e567954284 100644
+--- a/lib/buildid.c
++++ b/lib/buildid.c
+@@ -65,8 +65,8 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
+ 
+ 	freader_put_folio(r);
+ 
+-	/* reject secretmem folios created with memfd_secret() */
+-	if (secretmem_mapping(r->file->f_mapping))
++	/* reject folios without direct map entries (e.g. from memfd_secret() or guest_memfd()) */
++	if (mapping_no_direct_map(r->file->f_mapping))
+ 		return -EFAULT;
+ 
+ 	r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
+diff --git a/mm/gup.c b/mm/gup.c
+index adffe663594d..75a0cffdf37d 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -11,7 +11,6 @@
+ #include <linux/rmap.h>
+ #include <linux/swap.h>
+ #include <linux/swapops.h>
+-#include <linux/secretmem.h>
+ 
+ #include <linux/sched/signal.h>
+ #include <linux/rwsem.h>
+@@ -1234,7 +1233,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
+ 	if ((gup_flags & FOLL_SPLIT_PMD) && is_vm_hugetlb_page(vma))
+ 		return -EOPNOTSUPP;
+ 
+-	if (vma_is_secretmem(vma))
++	if (vma_has_no_direct_map(vma))
+ 		return -EFAULT;
+ 
+ 	if (write) {
+@@ -2736,7 +2735,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
+  * This call assumes the caller has pinned the folio, that the lowest page table
+  * level still points to this folio, and that interrupts have been disabled.
+  *
+- * GUP-fast must reject all secretmem folios.
++ * GUP-fast must reject all folios without direct map entries (such as secretmem).
+  *
+  * Writing to pinned file-backed dirty tracked folios is inherently problematic
+  * (see comment describing the writable_file_mapping_allowed() function). We
+@@ -2751,7 +2750,6 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags)
+ {
+ 	bool reject_file_backed = false;
+ 	struct address_space *mapping;
+-	bool check_secretmem = false;
+ 	unsigned long mapping_flags;
+ 
+ 	/*
+@@ -2763,18 +2761,10 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags)
+ 		reject_file_backed = true;
+ 
+ 	/* We hold a folio reference, so we can safely access folio fields. */
+-
+-	/* secretmem folios are always order-0 folios. */
+-	if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio))
+-		check_secretmem = true;
+-
+-	if (!reject_file_backed && !check_secretmem)
+-		return true;
+-
+ 	if (WARN_ON_ONCE(folio_test_slab(folio)))
+ 		return false;
+ 
+-	/* hugetlb neither requires dirty-tracking nor can be secretmem. */
++	/* hugetlb neither requires dirty-tracking nor can be without direct map. */
+ 	if (folio_test_hugetlb(folio))
+ 		return true;
+ 
+@@ -2812,8 +2802,9 @@ static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags)
+ 	 * At this point, we know the mapping is non-null and points to an
+ 	 * address_space object.
+ 	 */
+-	if (check_secretmem && secretmem_mapping(mapping))
++	if (mapping_no_direct_map(mapping))
+ 		return false;
++
+ 	/* The only remaining allowed file system is shmem. */
+ 	return !reject_file_backed || shmem_mapping(mapping);
+ }
+diff --git a/mm/mlock.c b/mm/mlock.c
+index a1d93ad33c6d..36f5e70faeb0 100644
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -474,7 +474,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ 
+ 	if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
+ 	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
+-	    vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE))
++	    vma_is_dax(vma) || vma_has_no_direct_map(vma) || (oldflags & VM_DROPPABLE))
+ 		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
+ 		goto out;
+ 
+diff --git a/mm/secretmem.c b/mm/secretmem.c
+index 422dcaa32506..b5ce55079695 100644
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -134,11 +134,6 @@ static int secretmem_mmap_prepare(struct vm_area_desc *desc)
+ 	return 0;
+ }
+ 
+-bool vma_is_secretmem(struct vm_area_struct *vma)
+-{
+-	return vma->vm_ops == &secretmem_vm_ops;
+-}
+-
+ static const struct file_operations secretmem_fops = {
+ 	.release	= secretmem_release,
+ 	.mmap_prepare	= secretmem_mmap_prepare,
+@@ -157,7 +152,7 @@ static void secretmem_free_folio(struct address_space *mapping,
+ 	folio_zero_segment(folio, 0, folio_size(folio));
+ }
+ 
+-const struct address_space_operations secretmem_aops = {
++static const struct address_space_operations secretmem_aops = {
+ 	.dirty_folio	= noop_dirty_folio,
+ 	.free_folio	= secretmem_free_folio,
+ 	.migrate_folio	= secretmem_migrate_folio,
+@@ -206,6 +201,7 @@ static struct file *secretmem_file_create(unsigned long flags)
+ 
+ 	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ 	mapping_set_unevictable(inode->i_mapping);
++	mapping_set_no_direct_map(inode->i_mapping);
+ 
+ 	inode->i_op = &secretmem_iops;
+ 	inode->i_mapping->a_ops = &secretmem_aops;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0004-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0004-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch
new file mode 100644
index 00000000000..dc5b78afb59
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0004-KVM-guest_memfd-Add-flag-to-remove-from-direct-map.patch
@@ -0,0 +1,308 @@
+From 01ed00298e296f373f3b8e7659b634196a966442 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Fri, 7 Feb 2025 14:33:01 +0000
+Subject: [PATCH 04/10] KVM: guest_memfd: Add flag to remove from direct map
+
+Add GUEST_MEMFD_FLAG_NO_DIRECT_MAP flag for KVM_CREATE_GUEST_MEMFD()
+ioctl. When set, guest_memfd folios will be removed from the direct map
+after preparation, with direct map entries only restored when the folios
+are freed.
+
+To ensure these folios do not end up in places where the kernel cannot
+deal with them, set AS_NO_DIRECT_MAP on the guest_memfd's struct
+address_space if GUEST_MEMFD_FLAG_NO_DIRECT_MAP is requested.
+
+Add KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP to let userspace discover whether
+guest_memfd supports GUEST_MEMFD_FLAG_NO_DIRECT_MAP. Support depends on
+guest_memfd itself being supported, but also on whether linux supports
+manipulatomg the direct map at page granularity at all (possible most of
+the time, outliers being arm64 where its impossible if the direct map
+has been setup using hugepages, as arm64 cannot break these apart due to
+break-before-make semantics, and powerpc, which does not select
+ARCH_HAS_SET_DIRECT_MAP, which also doesn't support guest_memfd anyway
+though).
+
+Note that this flag causes removal of direct map entries for all
+guest_memfd folios independent of whether they are "shared" or "private"
+(although current guest_memfd only supports either all folios in the
+"shared" state, or all folios in the "private" state if
+GUEST_MEMFD_FLAG_MMAP is not set). The usecase for removing direct map
+entries of also the shared parts of guest_memfd are a special type of
+non-CoCo VM where, host userspace is trusted to have access to all of
+guest memory, but where Spectre-style transient execution attacks
+through the host kernel's direct map should still be mitigated.  In this
+setup, KVM retains access to guest memory via userspace mappings of
+guest_memfd, which are reflected back into KVM's memslots via
+userspace_addr. This is needed for things like MMIO emulation on x86_64
+to work.
+
+Do not perform TLB flushes after direct map manipulations. This is
+because TLB flushes resulted in a up to 40x elongation of page faults in
+guest_memfd (scaling with the number of CPU cores), or a 5x elongation
+of memory population. TLB flushes are not needed for functional
+correctness (the virt->phys mapping technically stays "correct",  the
+kernel should simply not use it for a while). On the other hand, it means
+that the desired protection from Spectre-style attacks is not perfect,
+as an attacker could try to prevent a stale TLB entry from getting
+evicted, keeping it alive until the page it refers to is used by the
+guest for some sensitive data, and then targeting it using a
+spectre-gadget.
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ Documentation/virt/kvm/api.rst    |  5 ++++
+ arch/arm64/include/asm/kvm_host.h | 12 ++++++++
+ include/linux/kvm_host.h          |  9 ++++++
+ include/uapi/linux/kvm.h          |  2 ++
+ virt/kvm/guest_memfd.c            | 46 +++++++++++++++++++++++++------
+ virt/kvm/kvm_main.c               |  5 ++++
+ 6 files changed, 70 insertions(+), 9 deletions(-)
+
+diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
+index c17a87a0a5ac..b52c14d58798 100644
+--- a/Documentation/virt/kvm/api.rst
++++ b/Documentation/virt/kvm/api.rst
+@@ -6418,6 +6418,11 @@ When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field
+ supports GUEST_MEMFD_FLAG_MMAP.  Setting this flag on guest_memfd creation
+ enables mmap() and faulting of guest_memfd memory to host userspace.
+ 
++When the capability KVM_CAP_GMEM_NO_DIRECT_MAP is supported, the 'flags' field
++supports GUEST_MEMFG_FLAG_NO_DIRECT_MAP. Setting this flag makes the guest_memfd
++instance behave similarly to memfd_secret, and unmaps the memory backing it from
++the kernel's address space after allocation.
++
+ When the KVM MMU performs a PFN lookup to service a guest fault and the backing
+ guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be
+ consumed from guest_memfd, regardless of whether it is a shared or a private
+diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
+index 2f2394cce24e..0bfd8e5fd9de 100644
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -19,6 +19,7 @@
+ #include <linux/maple_tree.h>
+ #include <linux/percpu.h>
+ #include <linux/psci.h>
++#include <linux/set_memory.h>
+ #include <asm/arch_gicv3.h>
+ #include <asm/barrier.h>
+ #include <asm/cpufeature.h>
+@@ -1706,5 +1707,16 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
+ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
+ void check_feature_map(void);
+ 
++#ifdef CONFIG_KVM_GUEST_MEMFD
++static inline bool kvm_arch_gmem_supports_no_direct_map(void)
++{
++	/*
++	 * Without FWB, direct map access is needed in kvm_pgtable_stage2_map(),
++	 * as it calls dcache_clean_inval_poc().
++	 */
++	return can_set_direct_map() && cpus_have_final_cap(ARM64_HAS_STAGE2_FWB);
++}
++#define kvm_arch_gmem_supports_no_direct_map kvm_arch_gmem_supports_no_direct_map
++#endif /* CONFIG_KVM_GUEST_MEMFD */
+ 
+ #endif /* __ARM64_KVM_HOST_H__ */
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 8b47891adca1..a9468bce55f2 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -36,6 +36,7 @@
+ #include <linux/rbtree.h>
+ #include <linux/xarray.h>
+ #include <asm/signal.h>
++#include <linux/set_memory.h>
+ 
+ #include <linux/kvm.h>
+ #include <linux/kvm_para.h>
+@@ -731,6 +732,12 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
+ bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
+ #endif
+ 
++#ifdef CONFIG_KVM_GUEST_MEMFD
++#ifndef kvm_arch_gmem_supports_no_direct_map
++#define kvm_arch_gmem_supports_no_direct_map can_set_direct_map
++#endif
++#endif /* CONFIG_KVM_GUEST_MEMFD */
++
+ #ifndef kvm_arch_has_readonly_mem
+ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
+ {
+@@ -2573,6 +2580,8 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages
+ 
+ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
++#else
++static inline void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) { }
+ #endif
+ 
+ #ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY
+diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
+index 6efa98a57ec1..33c8e8946019 100644
+--- a/include/uapi/linux/kvm.h
++++ b/include/uapi/linux/kvm.h
+@@ -963,6 +963,7 @@ struct kvm_enable_cap {
+ #define KVM_CAP_RISCV_MP_STATE_RESET 242
+ #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+ #define KVM_CAP_GUEST_MEMFD_MMAP 244
++#define KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP 245
+ 
+ struct kvm_irq_routing_irqchip {
+ 	__u32 irqchip;
+@@ -1600,6 +1601,7 @@ struct kvm_memory_attributes {
+ 
+ #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
+ #define GUEST_MEMFD_FLAG_MMAP	(1ULL << 0)
++#define GUEST_MEMFD_FLAG_NO_DIRECT_MAP (1ULL << 1)
+ 
+ struct kvm_create_guest_memfd {
+ 	__u64 size;
+diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
+index 9ec4c45e3cf2..20217332dcd1 100644
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -4,6 +4,7 @@
+ #include <linux/kvm_host.h>
+ #include <linux/pagemap.h>
+ #include <linux/anon_inodes.h>
++#include <linux/set_memory.h>
+ 
+ #include "kvm_mm.h"
+ 
+@@ -42,9 +43,24 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo
+ 	return 0;
+ }
+ 
+-static inline void kvm_gmem_mark_prepared(struct folio *folio)
++static bool kvm_gmem_test_no_direct_map(struct inode *inode)
+ {
+-	folio_mark_uptodate(folio);
++	return ((unsigned long) inode->i_private) & GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
++}
++
++static inline int kvm_gmem_mark_prepared(struct folio *folio)
++{
++	struct inode *inode = folio_inode(folio);
++	int r = 0;
++
++	if (kvm_gmem_test_no_direct_map(inode))
++		r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
++						 false);
++
++	if (!r)
++		folio_mark_uptodate(folio);
++
++	return r;
+ }
+ 
+ /*
+@@ -82,7 +98,7 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
+ 	index = ALIGN_DOWN(index, 1 << folio_order(folio));
+ 	r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
+ 	if (!r)
+-		kvm_gmem_mark_prepared(folio);
++		r = kvm_gmem_mark_prepared(folio);
+ 
+ 	return r;
+ }
+@@ -344,8 +360,15 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
+ 	}
+ 
+ 	if (!folio_test_uptodate(folio)) {
++		int err = 0;
++
+ 		clear_highpage(folio_page(folio, 0));
+-		kvm_gmem_mark_prepared(folio);
++		err = kvm_gmem_mark_prepared(folio);
++
++		if (err) {
++			ret = vmf_error(err);
++			goto out_folio;
++		}
+ 	}
+ 
+ 	vmf->page = folio_file_page(folio, vmf->pgoff);
+@@ -429,7 +452,6 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
+ 	return MF_DELAYED;
+ }
+ 
+-#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+ static void kvm_gmem_free_folio(struct address_space *mapping,
+ 				struct folio *folio)
+ {
+@@ -437,17 +459,17 @@ static void kvm_gmem_free_folio(struct address_space *mapping,
+ 	kvm_pfn_t pfn = page_to_pfn(page);
+ 	int order = folio_order(folio);
+ 
++	if (kvm_gmem_test_no_direct_map(mapping->host))
++		WARN_ON_ONCE(set_direct_map_valid_noflush(page, folio_nr_pages(folio), true));
++
+ 	kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
+ }
+-#endif
+ 
+ static const struct address_space_operations kvm_gmem_aops = {
+ 	.dirty_folio = noop_dirty_folio,
+ 	.migrate_folio	= kvm_gmem_migrate_folio,
+ 	.error_remove_folio = kvm_gmem_error_folio,
+-#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
+ 	.free_folio = kvm_gmem_free_folio,
+-#endif
+ };
+ 
+ static int kvm_gmem_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+@@ -504,6 +526,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
+ 	/* Unmovable mappings are supposed to be marked unevictable as well. */
+ 	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
+ 
++	if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)
++		mapping_set_no_direct_map(inode->i_mapping);
++
+ 	kvm_get_kvm(kvm);
+ 	gmem->kvm = kvm;
+ 	xa_init(&gmem->bindings);
+@@ -528,6 +553,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
+ 	if (kvm_arch_supports_gmem_mmap(kvm))
+ 		valid_flags |= GUEST_MEMFD_FLAG_MMAP;
+ 
++	if (kvm_arch_gmem_supports_no_direct_map())
++		valid_flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
++
+ 	if (flags & ~valid_flags)
+ 		return -EINVAL;
+ 
+@@ -772,7 +800,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
+ 		p = src ? src + i * PAGE_SIZE : NULL;
+ 		ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
+ 		if (!ret)
+-			kvm_gmem_mark_prepared(folio);
++			ret = kvm_gmem_mark_prepared(folio);
+ 
+ put_folio_and_exit:
+ 		folio_put(folio);
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 18f29ef93543..6133bab21ab8 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -65,6 +65,7 @@
+ #include <trace/events/kvm.h>
+ 
+ #include <linux/kvm_dirty_ring.h>
++#include <linux/set_memory.h>
+ 
+ 
+ /* Worst case buffer size needed for holding an integer. */
+@@ -4916,6 +4917,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+ 		return kvm_supported_mem_attributes(kvm);
+ #endif
+ #ifdef CONFIG_KVM_GUEST_MEMFD
++	case KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP:
++		if (!kvm_arch_gmem_supports_no_direct_map())
++			return false;
++		fallthrough;
+ 	case KVM_CAP_GUEST_MEMFD:
+ 		return 1;
+ 	case KVM_CAP_GUEST_MEMFD_MMAP:
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0005-KVM-selftests-load-elf-via-bounce-buffer.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0005-KVM-selftests-load-elf-via-bounce-buffer.patch
new file mode 100644
index 00000000000..7149695d38b
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0005-KVM-selftests-load-elf-via-bounce-buffer.patch
@@ -0,0 +1,105 @@
+From 6823519f9f720b947dff39b33f6e59b91b2c7d03 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Fri, 21 Feb 2025 09:00:45 +0000
+Subject: [PATCH 05/10] KVM: selftests: load elf via bounce buffer
+
+If guest memory is backed using a VMA that does not allow GUP (e.g. a
+userspace mapping of guest_memfd when the fd was allocated using
+KVM_GMEM_NO_DIRECT_MAP), then directly loading the test ELF binary into
+it via read(2) potentially does not work. To nevertheless support
+loading binaries in this cases, do the read(2) syscall using a bounce
+buffer, and then memcpy from the bounce buffer into guest memory.
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ .../testing/selftests/kvm/include/test_util.h |  1 +
+ tools/testing/selftests/kvm/lib/elf.c         |  8 +++----
+ tools/testing/selftests/kvm/lib/io.c          | 23 +++++++++++++++++++
+ 3 files changed, 28 insertions(+), 4 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
+index c6ef895fbd9a..0409b7b96c94 100644
+--- a/tools/testing/selftests/kvm/include/test_util.h
++++ b/tools/testing/selftests/kvm/include/test_util.h
+@@ -46,6 +46,7 @@ do {								\
+ 
+ ssize_t test_write(int fd, const void *buf, size_t count);
+ ssize_t test_read(int fd, void *buf, size_t count);
++ssize_t test_read_bounce(int fd, void *buf, size_t count);
+ int test_seq_read(const char *path, char **bufp, size_t *sizep);
+ 
+ void __printf(5, 6) test_assert(bool exp, const char *exp_str,
+diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
+index f34d926d9735..e829fbe0a11e 100644
+--- a/tools/testing/selftests/kvm/lib/elf.c
++++ b/tools/testing/selftests/kvm/lib/elf.c
+@@ -31,7 +31,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+ 	 * the real size of the ELF header.
+ 	 */
+ 	unsigned char ident[EI_NIDENT];
+-	test_read(fd, ident, sizeof(ident));
++	test_read_bounce(fd, ident, sizeof(ident));
+ 	TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+ 		&& (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+ 		"ELF MAGIC Mismatch,\n"
+@@ -79,7 +79,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+ 	offset_rv = lseek(fd, 0, SEEK_SET);
+ 	TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+ 		"  rv: %zi expected: %i", offset_rv, 0);
+-	test_read(fd, hdrp, sizeof(*hdrp));
++	test_read_bounce(fd, hdrp, sizeof(*hdrp));
+ 	TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+ 		"Unexpected physical header size,\n"
+ 		"  hdrp->e_phentsize: %x\n"
+@@ -146,7 +146,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
+ 
+ 		/* Read in the program header. */
+ 		Elf64_Phdr phdr;
+-		test_read(fd, &phdr, sizeof(phdr));
++		test_read_bounce(fd, &phdr, sizeof(phdr));
+ 
+ 		/* Skip if this header doesn't describe a loadable segment. */
+ 		if (phdr.p_type != PT_LOAD)
+@@ -187,7 +187,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
+ 				"  expected: 0x%jx",
+ 				n1, errno, (intmax_t) offset_rv,
+ 				(intmax_t) phdr.p_offset);
+-			test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
++			test_read_bounce(fd, addr_gva2hva(vm, phdr.p_vaddr),
+ 				phdr.p_filesz);
+ 		}
+ 	}
+diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
+index fedb2a741f0b..74419becc8bc 100644
+--- a/tools/testing/selftests/kvm/lib/io.c
++++ b/tools/testing/selftests/kvm/lib/io.c
+@@ -155,3 +155,26 @@ ssize_t test_read(int fd, void *buf, size_t count)
+ 
+ 	return num_read;
+ }
++
++/* Test read via intermediary buffer
++ *
++ * Same as test_read, except read(2)s happen into a bounce buffer that is memcpy'd
++ * to buf. For use with buffers that cannot be GUP'd (e.g. guest_memfd VMAs if
++ * guest_memfd was created with GUEST_MEMFD_FLAG_NO_DIRECT_MAP).
++ */
++ssize_t test_read_bounce(int fd, void *buf, size_t count)
++{
++	void *bounce_buffer;
++	ssize_t num_read;
++
++	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
++
++	bounce_buffer = malloc(count);
++	TEST_ASSERT(bounce_buffer != NULL, "Failed to allocate bounce buffer");
++
++	num_read = test_read(fd, bounce_buffer, count);
++	memcpy(buf, bounce_buffer, num_read);
++	free(bounce_buffer);
++
++	return num_read;
++}
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0006-KVM-selftests-set-KVM_MEM_GUEST_MEMFD-in-vm_mem_add-.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0006-KVM-selftests-set-KVM_MEM_GUEST_MEMFD-in-vm_mem_add-.patch
new file mode 100644
index 00000000000..151686be060
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0006-KVM-selftests-set-KVM_MEM_GUEST_MEMFD-in-vm_mem_add-.patch
@@ -0,0 +1,71 @@
+From 27c849319c2eb4ba66b64478709a880fc12e93e4 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Thu, 20 Feb 2025 14:56:20 +0000
+Subject: [PATCH 06/10] KVM: selftests: set KVM_MEM_GUEST_MEMFD in vm_mem_add()
+ if guest_memfd != -1
+
+Have vm_mem_add() always set KVM_MEM_GUEST_MEMFD in the memslot flags if
+a guest_memfd is passed in as an argument. This eliminates the
+possibility where a guest_memfd instance is passed to vm_mem_add(), but
+it ends up being ignored because the flags argument does not specify
+KVM_MEM_GUEST_MEMFD at the same time.
+
+This makes it easy to support more scenarios in which no vm_mem_add() is
+not passed a guest_memfd instance, but is expected to allocate one.
+Currently, this only happens if guest_memfd == -1 but flags &
+KVM_MEM_GUEST_MEMFD != 0, but later vm_mem_add() will gain support for
+loading the test code itself into guest_memfd (via
+GUEST_MEMFD_FLAG_MMAP) if requested via a special
+vm_mem_backing_src_type, at which point having to make sure the src_type
+and flags are in-sync becomes cumbersome.
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ tools/testing/selftests/kvm/lib/kvm_util.c | 26 +++++++++++++---------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
+index c3f5142b0a54..cc67dfecbf65 100644
+--- a/tools/testing/selftests/kvm/lib/kvm_util.c
++++ b/tools/testing/selftests/kvm/lib/kvm_util.c
+@@ -1107,22 +1107,26 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ 
+ 	region->backing_src_type = src_type;
+ 
+-	if (flags & KVM_MEM_GUEST_MEMFD) {
+-		if (guest_memfd < 0) {
++	if (guest_memfd < 0) {
++		if (flags & KVM_MEM_GUEST_MEMFD) {
+ 			uint32_t guest_memfd_flags = 0;
+ 			TEST_ASSERT(!guest_memfd_offset,
+ 				    "Offset must be zero when creating new guest_memfd");
+ 			guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+-		} else {
+-			/*
+-			 * Install a unique fd for each memslot so that the fd
+-			 * can be closed when the region is deleted without
+-			 * needing to track if the fd is owned by the framework
+-			 * or by the caller.
+-			 */
+-			guest_memfd = dup(guest_memfd);
+-			TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ 		}
++	} else {
++		/*
++		 * Install a unique fd for each memslot so that the fd
++		 * can be closed when the region is deleted without
++		 * needing to track if the fd is owned by the framework
++		 * or by the caller.
++		 */
++		guest_memfd = dup(guest_memfd);
++		TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
++	}
++
++	if (guest_memfd > 0) {
++		flags |= KVM_MEM_GUEST_MEMFD;
+ 
+ 		region->region.guest_memfd = guest_memfd;
+ 		region->region.guest_memfd_offset = guest_memfd_offset;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0007-KVM-selftests-Add-guest_memfd-based-vm_mem_backing_s.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0007-KVM-selftests-Add-guest_memfd-based-vm_mem_backing_s.patch
new file mode 100644
index 00000000000..0a42b910784
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0007-KVM-selftests-Add-guest_memfd-based-vm_mem_backing_s.patch
@@ -0,0 +1,190 @@
+From 87fbe3433945bd5dfb9965d9ede56cdbad587040 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Thu, 20 Feb 2025 11:08:22 +0000
+Subject: [PATCH 07/10] KVM: selftests: Add guest_memfd based
+ vm_mem_backing_src_types
+
+Allow selftests to configure their memslots such that userspace_addr is
+set to a MAP_SHARED mapping of the guest_memfd that's associated with
+the memslot. This setup is the configuration for non-CoCo VMs, where all
+guest memory is backed by a guest_memfd whose folios are all marked
+shared, but KVM is still able to access guest memory to provide
+functionality such as MMIO emulation on x86.
+
+Add backing types for normal guest_memfd, as well as direct map removed
+guest_memfd.
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ .../testing/selftests/kvm/include/kvm_util.h  | 18 ++++++
+ .../testing/selftests/kvm/include/test_util.h |  7 +++
+ tools/testing/selftests/kvm/lib/kvm_util.c    | 63 ++++++++++---------
+ tools/testing/selftests/kvm/lib/test_util.c   |  8 +++
+ 4 files changed, 66 insertions(+), 30 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
+index 23a506d7eca3..5204a0a18a7f 100644
+--- a/tools/testing/selftests/kvm/include/kvm_util.h
++++ b/tools/testing/selftests/kvm/include/kvm_util.h
+@@ -635,6 +635,24 @@ static inline bool is_smt_on(void)
+ 
+ void vm_create_irqchip(struct kvm_vm *vm);
+ 
++static inline uint32_t backing_src_guest_memfd_flags(enum vm_mem_backing_src_type t)
++{
++	uint32_t flags = 0;
++
++	switch (t) {
++	case VM_MEM_SRC_GUEST_MEMFD:
++		flags |= GUEST_MEMFD_FLAG_MMAP;
++		fallthrough;
++	case VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP:
++		flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
++		break;
++	default:
++		break;
++	}
++
++	return flags;
++}
++
+ static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ 					uint64_t flags)
+ {
+diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
+index 0409b7b96c94..a56e53fc7b39 100644
+--- a/tools/testing/selftests/kvm/include/test_util.h
++++ b/tools/testing/selftests/kvm/include/test_util.h
+@@ -133,6 +133,8 @@ enum vm_mem_backing_src_type {
+ 	VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+ 	VM_MEM_SRC_SHMEM,
+ 	VM_MEM_SRC_SHARED_HUGETLB,
++	VM_MEM_SRC_GUEST_MEMFD,
++	VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP,
+ 	NUM_SRC_TYPES,
+ };
+ 
+@@ -165,6 +167,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
+ 	return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
+ }
+ 
++static inline bool backing_src_is_guest_memfd(enum vm_mem_backing_src_type t)
++{
++	return t == VM_MEM_SRC_GUEST_MEMFD || t == VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP;
++}
++
+ static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
+ {
+ 	return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
+diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
+index cc67dfecbf65..a81089f7c83f 100644
+--- a/tools/testing/selftests/kvm/lib/kvm_util.c
++++ b/tools/testing/selftests/kvm/lib/kvm_util.c
+@@ -1060,6 +1060,34 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ 	alignment = 1;
+ #endif
+ 
++	if (guest_memfd < 0) {
++		if ((flags & KVM_MEM_GUEST_MEMFD) || backing_src_is_guest_memfd(src_type)) {
++			uint32_t guest_memfd_flags = backing_src_guest_memfd_flags(src_type);
++
++			TEST_ASSERT(!guest_memfd_offset,
++				    "Offset must be zero when creating new guest_memfd");
++			guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
++		}
++	} else {
++		/*
++		 * Install a unique fd for each memslot so that the fd
++		 * can be closed when the region is deleted without
++		 * needing to track if the fd is owned by the framework
++		 * or by the caller.
++		 */
++		guest_memfd = dup(guest_memfd);
++		TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
++	}
++
++	if (guest_memfd > 0) {
++		flags |= KVM_MEM_GUEST_MEMFD;
++
++		region->region.guest_memfd = guest_memfd;
++		region->region.guest_memfd_offset = guest_memfd_offset;
++	} else {
++		region->region.guest_memfd = -1;
++	}
++
+ 	/*
+ 	 * When using THP mmap is not guaranteed to returned a hugepage aligned
+ 	 * address so we have to pad the mmap. Padding is not needed for HugeTLB
+@@ -1075,10 +1103,13 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ 	if (alignment > 1)
+ 		region->mmap_size += alignment;
+ 
+-	region->fd = -1;
+-	if (backing_src_is_shared(src_type))
++	if (backing_src_is_guest_memfd(src_type))
++		region->fd = guest_memfd;
++	else if (backing_src_is_shared(src_type))
+ 		region->fd = kvm_memfd_alloc(region->mmap_size,
+ 					     src_type == VM_MEM_SRC_SHARED_HUGETLB);
++	else
++		region->fd = -1;
+ 
+ 	region->mmap_start = mmap(NULL, region->mmap_size,
+ 				  PROT_READ | PROT_WRITE,
+@@ -1106,34 +1137,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ 	}
+ 
+ 	region->backing_src_type = src_type;
+-
+-	if (guest_memfd < 0) {
+-		if (flags & KVM_MEM_GUEST_MEMFD) {
+-			uint32_t guest_memfd_flags = 0;
+-			TEST_ASSERT(!guest_memfd_offset,
+-				    "Offset must be zero when creating new guest_memfd");
+-			guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+-		}
+-	} else {
+-		/*
+-		 * Install a unique fd for each memslot so that the fd
+-		 * can be closed when the region is deleted without
+-		 * needing to track if the fd is owned by the framework
+-		 * or by the caller.
+-		 */
+-		guest_memfd = dup(guest_memfd);
+-		TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+-	}
+-
+-	if (guest_memfd > 0) {
+-		flags |= KVM_MEM_GUEST_MEMFD;
+-
+-		region->region.guest_memfd = guest_memfd;
+-		region->region.guest_memfd_offset = guest_memfd_offset;
+-	} else {
+-		region->region.guest_memfd = -1;
+-	}
+-
+ 	region->unused_phy_pages = sparsebit_alloc();
+ 	if (vm_arch_has_protected_memory(vm))
+ 		region->protected_phy_pages = sparsebit_alloc();
+diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
+index 03eb99af9b8d..b2baee680083 100644
+--- a/tools/testing/selftests/kvm/lib/test_util.c
++++ b/tools/testing/selftests/kvm/lib/test_util.c
+@@ -299,6 +299,14 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
+ 			 */
+ 			.flag = MAP_SHARED,
+ 		},
++		[VM_MEM_SRC_GUEST_MEMFD] = {
++			.name = "guest_memfd",
++			.flag = MAP_SHARED,
++		},
++		[VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP] = {
++			.name = "guest_memfd_no_direct_map",
++			.flag = MAP_SHARED,
++		}
+ 	};
+ 	_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
+ 		       "Missing new backing src types?");
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0008-KVM-selftests-stuff-vm_mem_backing_src_type-into-vm_.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0008-KVM-selftests-stuff-vm_mem_backing_src_type-into-vm_.patch
new file mode 100644
index 00000000000..2487af32895
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0008-KVM-selftests-stuff-vm_mem_backing_src_type-into-vm_.patch
@@ -0,0 +1,98 @@
+From c0abd503fb650d6f99b1d2f247fc94fb392242bd Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Thu, 20 Feb 2025 13:46:01 +0000
+Subject: [PATCH 08/10] KVM: selftests: stuff vm_mem_backing_src_type into
+ vm_shape
+
+Use one of the padding fields in struct vm_shape to carry an enum
+vm_mem_backing_src_type value, to give the option to overwrite the
+default of VM_MEM_SRC_ANONYMOUS in __vm_create().
+
+Overwriting this default will allow tests to create VMs where the test
+code is backed by mmap'd guest_memfd instead of anonymous memory.
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ .../testing/selftests/kvm/include/kvm_util.h  | 19 ++++++++++---------
+ tools/testing/selftests/kvm/lib/kvm_util.c    |  2 +-
+ tools/testing/selftests/kvm/lib/x86/sev.c     |  1 +
+ .../selftests/kvm/pre_fault_memory_test.c     |  1 +
+ 4 files changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
+index 5204a0a18a7f..8baa0bbacd09 100644
+--- a/tools/testing/selftests/kvm/include/kvm_util.h
++++ b/tools/testing/selftests/kvm/include/kvm_util.h
+@@ -188,7 +188,7 @@ enum vm_guest_mode {
+ struct vm_shape {
+ 	uint32_t type;
+ 	uint8_t  mode;
+-	uint8_t  pad0;
++	uint8_t  src_type;
+ 	uint16_t pad1;
+ };
+ 
+@@ -196,14 +196,15 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+ 
+ #define VM_TYPE_DEFAULT			0
+ 
+-#define VM_SHAPE(__mode)			\
+-({						\
+-	struct vm_shape shape = {		\
+-		.mode = (__mode),		\
+-		.type = VM_TYPE_DEFAULT		\
+-	};					\
+-						\
+-	shape;					\
++#define VM_SHAPE(__mode)				\
++({							\
++	struct vm_shape shape = {			\
++		.mode	  = (__mode),			\
++		.type	  = VM_TYPE_DEFAULT,		\
++		.src_type = VM_MEM_SRC_ANONYMOUS	\
++	};						\
++							\
++	shape;						\
+ })
+ 
+ #if defined(__aarch64__)
+diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
+index a81089f7c83f..3a22794bd959 100644
+--- a/tools/testing/selftests/kvm/lib/kvm_util.c
++++ b/tools/testing/selftests/kvm/lib/kvm_util.c
+@@ -495,7 +495,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ 	if (is_guest_memfd_required(shape))
+ 		flags |= KVM_MEM_GUEST_MEMFD;
+ 
+-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
++	vm_userspace_mem_region_add(vm, shape.src_type, 0, 0, nr_pages, flags);
+ 	for (i = 0; i < NR_MEM_REGIONS; i++)
+ 		vm->memslots[i] = 0;
+ 
+diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
+index c3a9838f4806..d920880e4fc0 100644
+--- a/tools/testing/selftests/kvm/lib/x86/sev.c
++++ b/tools/testing/selftests/kvm/lib/x86/sev.c
+@@ -164,6 +164,7 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+ 	struct vm_shape shape = {
+ 		.mode = VM_MODE_DEFAULT,
+ 		.type = type,
++		.src_type = VM_MEM_SRC_ANONYMOUS,
+ 	};
+ 	struct kvm_vm *vm;
+ 	struct kvm_vcpu *cpus[1];
+diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
+index 0350a8896a2f..d403f8d2f26f 100644
+--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
++++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
+@@ -68,6 +68,7 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
+ 	const struct vm_shape shape = {
+ 		.mode = VM_MODE_DEFAULT,
+ 		.type = vm_type,
++		.src_type = VM_MEM_SRC_ANONYMOUS,
+ 	};
+ 	struct kvm_vcpu *vcpu;
+ 	struct kvm_run *run;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0009-KVM-selftests-cover-GUEST_MEMFD_FLAG_NO_DIRECT_MAP-i.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0009-KVM-selftests-cover-GUEST_MEMFD_FLAG_NO_DIRECT_MAP-i.patch
new file mode 100644
index 00000000000..6aa997ec841
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0009-KVM-selftests-cover-GUEST_MEMFD_FLAG_NO_DIRECT_MAP-i.patch
@@ -0,0 +1,64 @@
+From f50caa83e9d90c71bc473e9e0ac0eef205ca62b9 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Thu, 24 Oct 2024 07:18:57 +0100
+Subject: [PATCH 09/10] KVM: selftests: cover GUEST_MEMFD_FLAG_NO_DIRECT_MAP in
+ existing selftests
+
+Extend mem conversion selftests to cover the scenario that the guest can
+fault in and write gmem-backed guest memory even if its direct map
+removed. Also cover the new flag in guest_memfd_test.c tests.
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ tools/testing/selftests/kvm/guest_memfd_test.c             | 2 ++
+ .../selftests/kvm/x86/private_mem_conversions_test.c       | 7 ++++---
+ 2 files changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
+index b3ca6737f304..1187438b6831 100644
+--- a/tools/testing/selftests/kvm/guest_memfd_test.c
++++ b/tools/testing/selftests/kvm/guest_memfd_test.c
+@@ -275,6 +275,8 @@ static void test_guest_memfd(unsigned long vm_type)
+ 
+ 	if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP))
+ 		flags |= GUEST_MEMFD_FLAG_MMAP;
++	if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP))
++		flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
+ 
+ 	test_create_guest_memfd_multiple(vm);
+ 	test_create_guest_memfd_invalid_sizes(vm, flags, page_size);
+diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+index 82a8d88b5338..8427d9fbdb23 100644
+--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
++++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+@@ -367,7 +367,7 @@ static void *__test_mem_conversions(void *__vcpu)
+ }
+ 
+ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+-				 uint32_t nr_memslots)
++				 uint32_t nr_memslots, uint64_t gmem_flags)
+ {
+ 	/*
+ 	 * Allocate enough memory so that each vCPU's chunk of memory can be
+@@ -394,7 +394,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
+ 
+ 	vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+ 
+-	memfd = vm_create_guest_memfd(vm, memfd_size, 0);
++	memfd = vm_create_guest_memfd(vm, memfd_size, gmem_flags);
+ 
+ 	for (i = 0; i < nr_memslots; i++)
+ 		vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+@@ -477,7 +477,8 @@ int main(int argc, char *argv[])
+ 		}
+ 	}
+ 
+-	test_mem_conversions(src_type, nr_vcpus, nr_memslots);
++	test_mem_conversions(src_type, nr_vcpus, nr_memslots, 0);
++	test_mem_conversions(src_type, nr_vcpus, nr_memslots, GUEST_MEMFD_FLAG_NO_DIRECT_MAP);
+ 
+ 	return 0;
+ }
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/10-direct-map-removal/0010-KVM-selftests-Test-guest-execution-from-direct-map-r.patch b/resources/hiding_ci/linux_patches/10-direct-map-removal/0010-KVM-selftests-Test-guest-execution-from-direct-map-r.patch
new file mode 100644
index 00000000000..a7326d67e2f
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/10-direct-map-removal/0010-KVM-selftests-Test-guest-execution-from-direct-map-r.patch
@@ -0,0 +1,91 @@
+From 5a633437724f636327a58eef48b1ef0595108b37 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Fri, 21 Feb 2025 08:18:24 +0000
+Subject: [PATCH 10/10] KVM: selftests: Test guest execution from direct map
+ removed gmem
+
+Add a selftest that loads itself into guest_memfd (via
+GUEST_MEMFD_FLAG_MMAP) and triggers an MMIO exit when executed. This
+exercises x86 MMIO emulation code inside KVM for guest_memfd-backed
+memslots where the guest_memfd folios are direct map removed.
+Particularly, it validates that x86 MMIO emulation code (guest page
+table walks + instruction fetch) correctly accesses gmem through the VMA
+that's been reflected into the memslot's userspace_addr field (instead
+of trying to do direct map accesses).
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ .../selftests/kvm/set_memory_region_test.c    | 50 +++++++++++++++++--
+ 1 file changed, 46 insertions(+), 4 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
+index ce3ac0fd6dfb..cb3bc642d376 100644
+--- a/tools/testing/selftests/kvm/set_memory_region_test.c
++++ b/tools/testing/selftests/kvm/set_memory_region_test.c
+@@ -603,6 +603,41 @@ static void test_mmio_during_vectoring(void)
+ 
+ 	kvm_vm_free(vm);
+ }
++
++static void guest_code_trigger_mmio(void)
++{
++	/*
++	 * Read some GPA that is not backed by a memslot. KVM consider this
++	 * as MMIO and tell userspace to emulate the read.
++	 */
++	READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
++
++	GUEST_DONE();
++}
++
++static void test_guest_memfd_mmio(void)
++{
++	struct kvm_vm *vm;
++	struct kvm_vcpu *vcpu;
++	struct vm_shape shape = {
++		.mode = VM_MODE_DEFAULT,
++		.src_type = VM_MEM_SRC_GUEST_MEMFD_NO_DIRECT_MAP,
++	};
++	pthread_t vcpu_thread;
++
++	pr_info("Testing MMIO emulation for instructions in gmem\n");
++
++	vm = __vm_create_shape_with_one_vcpu(shape, &vcpu, 0, guest_code_trigger_mmio);
++
++	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1);
++
++	pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
++
++	/* If the MMIO read was successfully emulated, the vcpu thread will exit */
++	pthread_join(vcpu_thread, NULL);
++
++	kvm_vm_free(vm);
++}
+ #endif
+ 
+ int main(int argc, char *argv[])
+@@ -626,10 +661,17 @@ int main(int argc, char *argv[])
+ 	test_add_max_memory_regions();
+ 
+ #ifdef __x86_64__
+-	if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) &&
+-	    (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) {
+-		test_add_private_memory_region();
+-		test_add_overlapping_private_memory_regions();
++	if (kvm_has_cap(KVM_CAP_GUEST_MEMFD)) {
++		if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) {
++			test_add_private_memory_region();
++			test_add_overlapping_private_memory_regions();
++		}
++
++		if (kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP) &&
++			kvm_has_cap(KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP))
++			test_guest_memfd_mmio();
++		else
++			pr_info("Skipping tests requiring KVM_CAP_GUEST_MEMFD_MMAP | KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP");
+ 	} else {
+ 		pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n");
+ 	}
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/11-kvm-clock/0001-KVM-x86-use-uhva-for-kvm-clock-if-kvm_gpc_refresh-fa.patch b/resources/hiding_ci/linux_patches/11-kvm-clock/0001-KVM-x86-use-uhva-for-kvm-clock-if-kvm_gpc_refresh-fa.patch
new file mode 100644
index 00000000000..755f1c0c73c
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/11-kvm-clock/0001-KVM-x86-use-uhva-for-kvm-clock-if-kvm_gpc_refresh-fa.patch
@@ -0,0 +1,103 @@
+From 0a04094c8b7e292fcb7bdf8528d70baddbfff379 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Fri, 18 Jul 2025 15:59:39 +0100
+Subject: [PATCH 01/15] KVM: x86: use uhva for kvm-clock if kvm_gpc_refresh()
+ fails
+
+kvm-clock uses a gfn_to_pfn_cache to avoid repeated gpa->pfn
+computations, relying on mmu notifiers to determine when the translation
+needs to be redone.
+
+If the guest places the kvm-clock for some vcpu into memory that is
+backed by a KVM_MEMSLOT_GMEM_ONLY memslot, and the guest_memfd instance
+has GUEST_MEMFD_FLAG_NO_DIRECT_MAP set, this does not work:
+gfn_to_pfn_cache internally uses GUP to resolve uhva->pfn, which
+returned -EFAULT for direct map removed memory. But even if this pfn
+computation were to work, the subsequent attempts to access guest memory
+through the direct map would obviously fail.
+
+For this scenario, all other parts of kvm fall back to instead accessing
+guest memory through userspace mapping of guest_memfd, which is stored
+in the memslots userspace_addr. Have kvm-clock do the same by handling
+failures in kvm_gpc_refresh() with a fallback to a pvclock update
+routine that operates on userspace mappings. This looses the
+optimization of gfn_to_pfn_cache for these VMs, but on modern hardawre
+kvm-clock update requests should be rare enough for this to not matter
+(and guest_memfd is not support for Xen VMs, where speed of pvclock
+accesses is more relevant).
+
+Alternatively, it would be possible to team gfn_to_pfn_cache about
+(direct map removed) guest_memfd, however the combination of on-demand
+direct map reinsertion (and its induced ref-counting) and hooking
+gfn_to_pfn_caches up to gmem invalidations has proven significantly more
+complex [1], and hence simply falling back to userspace mappings was
+suggested by Sean at one of the guest_memfd upstream calls.
+
+[1]: https://lore.kernel.org/kvm/20240910163038.1298452-9-roypat@amazon.co.uk/
+     https://lore.kernel.org/kvm/20240910163038.1298452-10-roypat@amazon.co.uk/
+
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+---
+ arch/x86/kvm/x86.c | 38 +++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 37 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 33fba801b205..c8fd35c1bbda 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3149,6 +3149,40 @@ u64 get_kvmclock_ns(struct kvm *kvm)
+ 	return data.clock;
+ }
+ 
++static void kvm_setup_guest_pvclock_slow(struct pvclock_vcpu_time_info *ref_hv_clock,
++					 struct kvm_vcpu *vcpu,
++					 gpa_t gpa)
++{
++	struct pvclock_vcpu_time_info guest_hv_clock;
++	struct pvclock_vcpu_time_info hv_clock;
++
++	memcpy(&hv_clock, ref_hv_clock, sizeof(hv_clock));
++
++	kvm_read_guest(vcpu->kvm, gpa, &guest_hv_clock, sizeof(struct pvclock_vcpu_time_info));
++
++	/*
++	 * This VCPU is paused, but it's legal for a guest to read another
++	 * VCPU's kvmclock, so we really have to follow the specification where
++	 * it says that version is odd if data is being modified, and even after
++	 * it is consistent.
++	 */
++
++	guest_hv_clock.version = hv_clock.version = (guest_hv_clock.version + 1) | 1;
++	smp_wmb();
++
++	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
++	hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
++
++	kvm_write_guest(vcpu->kvm, gpa, &hv_clock, sizeof(struct pvclock_vcpu_time_info));
++
++	smp_wmb();
++
++	++hv_clock.version;
++	kvm_write_guest(vcpu->kvm, gpa + offsetof(struct pvclock_vcpu_time_info, version), &hv_clock.version, sizeof(hv_clock.version));
++
++	trace_kvm_pvclock_update(vcpu->vcpu_id, &hv_clock);
++}
++
+ static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock,
+ 				    struct kvm_vcpu *vcpu,
+ 				    struct gfn_to_pfn_cache *gpc,
+@@ -3164,8 +3198,10 @@ static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock,
+ 	while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) {
+ 		read_unlock_irqrestore(&gpc->lock, flags);
+ 
+-		if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock)))
++		if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock))) {
++			kvm_setup_guest_pvclock_slow(ref_hv_clock, vcpu, gpc->gpa + offset);
+ 			return;
++		}
+ 
+ 		read_lock_irqsave(&gpc->lock, flags);
+ 	}
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0002-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0002-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch
new file mode 100644
index 00000000000..edf486dcbb1
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0002-KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap.patch
@@ -0,0 +1,158 @@
+From b987ad3e2757479b136abe917bde7ab0030810a2 Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Thu, 9 Jan 2025 20:49:17 +0000
+Subject: [PATCH 02/15] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap
+
+Use one of the 14 reserved u64s in struct kvm_userspace_memory_region2
+for the user to provide `userfault_bitmap`.
+
+The memslot flag indicates if KVM should be reading from the
+`userfault_bitmap` field from the memslot. The user is permitted to
+provide a bogus pointer. If the pointer cannot be read from, we will
+return -EFAULT (with no other information) back to the user.
+
+Signed-off-by: James Houghton <jthoughton@google.com>
+---
+ include/linux/kvm_host.h | 14 ++++++++++++++
+ include/uapi/linux/kvm.h |  4 +++-
+ virt/kvm/Kconfig         |  3 +++
+ virt/kvm/kvm_main.c      | 35 +++++++++++++++++++++++++++++++++++
+ 4 files changed, 55 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index a9468bce55f2..7911e7648dec 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -600,6 +600,7 @@ struct kvm_memory_slot {
+ 	unsigned long *dirty_bitmap;
+ 	struct kvm_arch_memory_slot arch;
+ 	unsigned long userspace_addr;
++	unsigned long __user *userfault_bitmap;
+ 	u32 flags;
+ 	short id;
+ 	u16 as_id;
+@@ -745,6 +746,11 @@ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
+ }
+ #endif
+ 
++static inline bool kvm_has_userfault(struct kvm *kvm)
++{
++	return IS_ENABLED(CONFIG_HAVE_KVM_USERFAULT);
++}
++
+ struct kvm_memslots {
+ 	u64 generation;
+ 	atomic_long_t last_used_slot;
+@@ -2597,4 +2603,12 @@ static inline int kvm_enable_virtualization(void) { return 0; }
+ static inline void kvm_disable_virtualization(void) { }
+ #endif
+ 
++int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot,
++		      gfn_t gfn);
++
++static inline bool kvm_memslot_userfault(struct kvm_memory_slot *memslot)
++{
++	return memslot->flags & KVM_MEM_USERFAULT;
++}
++
+ #endif
+diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
+index 33c8e8946019..641622739a71 100644
+--- a/include/uapi/linux/kvm.h
++++ b/include/uapi/linux/kvm.h
+@@ -40,7 +40,8 @@ struct kvm_userspace_memory_region2 {
+ 	__u64 guest_memfd_offset;
+ 	__u32 guest_memfd;
+ 	__u32 pad1;
+-	__u64 pad2[14];
++	__u64 userfault_bitmap;
++	__u64 pad2[13];
+ };
+ 
+ /*
+@@ -51,6 +52,7 @@ struct kvm_userspace_memory_region2 {
+ #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+ #define KVM_MEM_READONLY	(1UL << 1)
+ #define KVM_MEM_GUEST_MEMFD	(1UL << 2)
++#define KVM_MEM_USERFAULT	(1UL << 3)
+ 
+ /* for KVM_IRQ_LINE */
+ struct kvm_irq_level {
+diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
+index 1b7d5be0b6c4..1ba90f2af313 100644
+--- a/virt/kvm/Kconfig
++++ b/virt/kvm/Kconfig
+@@ -127,3 +127,6 @@ config HAVE_KVM_ARCH_GMEM_INVALIDATE
+ config HAVE_KVM_ARCH_GMEM_POPULATE
+        bool
+        depends on KVM_GUEST_MEMFD
++
++config HAVE_KVM_USERFAULT
++       bool
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 6133bab21ab8..6ab616527cf7 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1605,6 +1605,9 @@ static int check_memory_region_flags(struct kvm *kvm,
+ 	    !(mem->flags & KVM_MEM_GUEST_MEMFD))
+ 		valid_flags |= KVM_MEM_READONLY;
+ 
++	if (kvm_has_userfault(kvm))
++		valid_flags |= KVM_MEM_USERFAULT;
++
+ 	if (mem->flags & ~valid_flags)
+ 		return -EINVAL;
+ 
+@@ -2040,6 +2043,12 @@ static int kvm_set_memory_region(struct kvm *kvm,
+ 	if (id < KVM_USER_MEM_SLOTS &&
+ 	    (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
+ 		return -EINVAL;
++	if (mem->flags & KVM_MEM_USERFAULT &&
++	    ((mem->userfault_bitmap != untagged_addr(mem->userfault_bitmap)) ||
++	     !access_ok((void __user *)(unsigned long)mem->userfault_bitmap,
++			DIV_ROUND_UP(mem->memory_size >> PAGE_SHIFT, BITS_PER_LONG)
++			 * sizeof(long))))
++		return -EINVAL;
+ 
+ 	slots = __kvm_memslots(kvm, as_id);
+ 
+@@ -2108,6 +2117,9 @@ static int kvm_set_memory_region(struct kvm *kvm,
+ 		if (r)
+ 			goto out;
+ 	}
++	if (mem->flags & KVM_MEM_USERFAULT)
++		new->userfault_bitmap =
++		  (unsigned long __user *)(unsigned long)mem->userfault_bitmap;
+ 
+ 	r = kvm_set_memslot(kvm, old, new, change);
+ 	if (r)
+@@ -6551,3 +6563,26 @@ void kvm_exit(void)
+ 	kvm_irqfd_exit();
+ }
+ EXPORT_SYMBOL_GPL(kvm_exit);
++
++int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot,
++		       gfn_t gfn)
++{
++	unsigned long bitmap_chunk = 0;
++	off_t offset;
++
++	if (!kvm_memslot_userfault(memslot))
++		return 0;
++
++	if (WARN_ON_ONCE(!memslot->userfault_bitmap))
++		return 0;
++
++	offset = gfn - memslot->base_gfn;
++
++	if (copy_from_user(&bitmap_chunk,
++			   memslot->userfault_bitmap + offset / BITS_PER_LONG,
++			   sizeof(bitmap_chunk)))
++		return -EFAULT;
++
++	/* Set in the bitmap means that the gfn is userfault */
++	return !!(bitmap_chunk & (1ul << (offset % BITS_PER_LONG)));
++}
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0003-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0003-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch
new file mode 100644
index 00000000000..cc40e3fd2c2
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0003-KVM-Add-KVM_MEMORY_EXIT_FLAG_USERFAULT.patch
@@ -0,0 +1,28 @@
+From 91e24dd59bbdbae73fe1f2a2fc667b7dfdf4419c Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Thu, 9 Jan 2025 20:49:18 +0000
+Subject: [PATCH 03/15] KVM: Add KVM_MEMORY_EXIT_FLAG_USERFAULT
+
+This flag is used for vCPU memory faults caused by KVM Userfault; i.e.,
+the bit in `userfault_bitmap` corresponding to the faulting gfn was set.
+
+Signed-off-by: James Houghton <jthoughton@google.com>
+---
+ include/uapi/linux/kvm.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
+index 641622739a71..5757a8c9b23b 100644
+--- a/include/uapi/linux/kvm.h
++++ b/include/uapi/linux/kvm.h
+@@ -446,6 +446,7 @@ struct kvm_run {
+ 		/* KVM_EXIT_MEMORY_FAULT */
+ 		struct {
+ #define KVM_MEMORY_EXIT_FLAG_PRIVATE	(1ULL << 3)
++#define KVM_MEMORY_EXIT_FLAG_USERFAULT	(1ULL << 4)
+ 			__u64 flags;
+ 			__u64 gpa;
+ 			__u64 size;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0004-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0004-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch
new file mode 100644
index 00000000000..1e6b4974270
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0004-KVM-Allow-late-setting-of-KVM_MEM_USERFAULT-on-guest.patch
@@ -0,0 +1,58 @@
+From 9375ae487ca8c7bbb3dbc57760915d742eecbf37 Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Thu, 9 Jan 2025 20:49:19 +0000
+Subject: [PATCH 04/15] KVM: Allow late setting of KVM_MEM_USERFAULT on
+ guest_memfd memslot
+
+Currently guest_memfd memslots can only be deleted. Slightly change the
+logic to allow KVM_MR_FLAGS_ONLY changes when the only flag being
+changed is KVM_MEM_USERFAULT.
+
+Signed-off-by: James Houghton <jthoughton@google.com>
+---
+ virt/kvm/kvm_main.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 6ab616527cf7..f43a8f40b94b 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -2081,9 +2081,6 @@ static int kvm_set_memory_region(struct kvm *kvm,
+ 		if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
+ 			return -EINVAL;
+ 	} else { /* Modify an existing slot. */
+-		/* Private memslots are immutable, they can only be deleted. */
+-		if (mem->flags & KVM_MEM_GUEST_MEMFD)
+-			return -EINVAL;
+ 		if ((mem->userspace_addr != old->userspace_addr) ||
+ 		    (npages != old->npages) ||
+ 		    ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
+@@ -2097,6 +2094,16 @@ static int kvm_set_memory_region(struct kvm *kvm,
+ 			return 0;
+ 	}
+ 
++	/*
++	 * Except for being able to set KVM_MEM_USERFAULT, private memslots are
++	 * immutable, they can only be deleted.
++	 */
++	if (mem->flags & KVM_MEM_GUEST_MEMFD &&
++	    !(change == KVM_MR_CREATE ||
++	      (change == KVM_MR_FLAGS_ONLY &&
++	       (mem->flags ^ old->flags) == KVM_MEM_USERFAULT)))
++		return -EINVAL;
++
+ 	if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
+ 	    kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
+ 		return -EEXIST;
+@@ -2112,7 +2119,7 @@ static int kvm_set_memory_region(struct kvm *kvm,
+ 	new->npages = npages;
+ 	new->flags = mem->flags;
+ 	new->userspace_addr = mem->userspace_addr;
+-	if (mem->flags & KVM_MEM_GUEST_MEMFD) {
++	if (mem->flags & KVM_MEM_GUEST_MEMFD && change == KVM_MR_CREATE) {
+ 		r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset);
+ 		if (r)
+ 			goto out;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0005-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0005-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch
new file mode 100644
index 00000000000..d56d5ba5127
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0005-KVM-x86-mmu-Add-support-for-KVM_MEM_USERFAULT.patch
@@ -0,0 +1,209 @@
+From ee100703450a5cdf0e23330699f023b4f599c9c2 Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Thu, 9 Jan 2025 20:49:21 +0000
+Subject: [PATCH 05/15] KVM: x86/mmu: Add support for KVM_MEM_USERFAULT
+
+Adhering to the requirements of KVM Userfault:
+
+1. Zap all sptes for the memslot when KVM_MEM_USERFAULT is toggled on
+   with kvm_arch_flush_shadow_memslot().
+2. Only all PAGE_SIZE sptes when KVM_MEM_USERFAULT is enabled (for both
+   normal/GUP memory and guest_memfd memory).
+3. Reconstruct huge mappings when KVM_MEM_USERFAULT is toggled off with
+   kvm_mmu_recover_huge_pages(). This is the behavior when dirty logging
+   is disabled; remain consistent with it.
+
+With the new logic in kvm_mmu_slot_apply_flags(), I've simplified the
+two dirty-logging-toggle checks into one, and I have dropped the
+WARN_ON() that was there.
+
+Signed-off-by: James Houghton <jthoughton@google.com>
+---
+ arch/arm64/kvm/mmu.c            |  2 +-
+ arch/arm64/kvm/nested.c         |  2 +-
+ arch/x86/kvm/Kconfig            |  1 +
+ arch/x86/kvm/mmu/mmu.c          | 12 +++++++++++
+ arch/x86/kvm/mmu/mmu_internal.h | 20 +++++++++++++++---
+ arch/x86/kvm/x86.c              | 36 ++++++++++++++++++++++++---------
+ include/linux/kvm_host.h        |  5 ++++-
+ 7 files changed, 62 insertions(+), 16 deletions(-)
+
+diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
+index a36426ccd9b5..6af2702cc2b1 100644
+--- a/arch/arm64/kvm/mmu.c
++++ b/arch/arm64/kvm/mmu.c
+@@ -1558,7 +1558,7 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 	ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL);
+ 	if (ret) {
+ 		kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE,
+-					      write_fault, exec_fault, false);
++					      write_fault, exec_fault, false, false);
+ 		return ret;
+ 	}
+ 
+diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
+index 27ebcae35299..18d493f96259 100644
+--- a/arch/arm64/kvm/nested.c
++++ b/arch/arm64/kvm/nested.c
+@@ -1231,7 +1231,7 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem)
+ 		ret = kvm_gmem_get_pfn(vcpu->kvm, memslot, gfn, &pfn, &page, NULL);
+ 		if (ret) {
+ 			kvm_prepare_memory_fault_exit(vcpu, vt->wr.pa, PAGE_SIZE,
+-					      write_fault, false, false);
++					      write_fault, false, false, false);
+ 			return ret;
+ 		}
+ 	}
+diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
+index 4e43923656d0..1390ba799d4f 100644
+--- a/arch/x86/kvm/Kconfig
++++ b/arch/x86/kvm/Kconfig
+@@ -48,6 +48,7 @@ config KVM_X86
+ 	select KVM_GENERIC_PRE_FAULT_MEMORY
+ 	select KVM_WERROR if WERROR
+ 	select KVM_GUEST_MEMFD if X86_64
++	select HAVE_KVM_USERFAULT
+ 
+ config KVM
+ 	tristate "Kernel-based Virtual Machine (KVM) support"
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 56c80588efa0..ae0f244357a5 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4588,6 +4588,18 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
+ 				 struct kvm_page_fault *fault)
+ {
+ 	unsigned int foll = fault->write ? FOLL_WRITE : 0;
++	int userfault;
++
++	userfault = kvm_gfn_userfault(vcpu->kvm, fault->slot, fault->gfn);
++	if (userfault < 0)
++		return userfault;
++	if (userfault) {
++		kvm_mmu_prepare_userfault_exit(vcpu, fault);
++		return -EFAULT;
++	}
++
++	if (kvm_memslot_userfault(fault->slot))
++		fault->max_level = PG_LEVEL_4K;
+ 
+ 	if (fault->is_private || kvm_memslot_is_gmem_only(fault->slot))
+ 		return kvm_mmu_faultin_pfn_gmem(vcpu, fault);
+diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
+index b776be783a2f..120ce9d340b4 100644
+--- a/arch/x86/kvm/mmu/mmu_internal.h
++++ b/arch/x86/kvm/mmu/mmu_internal.h
+@@ -339,12 +339,26 @@ enum {
+  */
+ static_assert(RET_PF_CONTINUE == 0);
+ 
+-static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+-						     struct kvm_page_fault *fault)
++static inline void __kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
++						       struct kvm_page_fault *fault,
++						       bool is_userfault)
+ {
+ 	kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
+ 				      PAGE_SIZE, fault->write, fault->exec,
+-				      fault->is_private);
++				      fault->is_private,
++				      is_userfault);
++}
++
++static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
++						     struct kvm_page_fault *fault)
++{
++	__kvm_mmu_prepare_memory_fault_exit(vcpu, fault, false);
++}
++
++static inline void kvm_mmu_prepare_userfault_exit(struct kvm_vcpu *vcpu,
++						  struct kvm_page_fault *fault)
++{
++	__kvm_mmu_prepare_memory_fault_exit(vcpu, fault, true);
+ }
+ 
+ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index c8fd35c1bbda..d9b58f555959 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -13094,12 +13094,36 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
+ 	u32 new_flags = new ? new->flags : 0;
+ 	bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES;
+ 
++	/*
++	 * When toggling KVM Userfault on, zap all sptes so that userfault-ness
++	 * will be respected at refault time. All new faults will only install
++	 * small sptes. Therefore, when toggling it off, recover hugepages.
++	 *
++	 * For MOVE and DELETE, there will be nothing to do, as the old
++	 * mappings will have already been deleted by
++	 * kvm_arch_flush_shadow_memslot().
++	 *
++	 * For CREATE, no mappings will have been created yet.
++	 */
++	if ((old_flags ^ new_flags) & KVM_MEM_USERFAULT &&
++	    (change == KVM_MR_FLAGS_ONLY)) {
++		if (old_flags & KVM_MEM_USERFAULT)
++			kvm_mmu_recover_huge_pages(kvm, new);
++		else
++			kvm_arch_flush_shadow_memslot(kvm, old);
++	}
++
++	/*
++	 * Nothing more to do if dirty logging isn't being toggled.
++	 */
++	if (!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES))
++		return;
++
+ 	/*
+ 	 * Update CPU dirty logging if dirty logging is being toggled.  This
+ 	 * applies to all operations.
+ 	 */
+-	if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)
+-		kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
++	kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
+ 
+ 	/*
+ 	 * Nothing more to do for RO slots (which can't be dirtied and can't be
+@@ -13119,14 +13143,6 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
+ 	if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY))
+ 		return;
+ 
+-	/*
+-	 * READONLY and non-flags changes were filtered out above, and the only
+-	 * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty
+-	 * logging isn't being toggled on or off.
+-	 */
+-	if (WARN_ON_ONCE(!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)))
+-		return;
+-
+ 	if (!log_dirty_pages) {
+ 		/*
+ 		 * Recover huge page mappings in the slot now that dirty logging
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 7911e7648dec..70e6a5210ceb 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -2492,7 +2492,8 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr)
+ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+ 						 gpa_t gpa, gpa_t size,
+ 						 bool is_write, bool is_exec,
+-						 bool is_private)
++						 bool is_private,
++						 bool is_userfault)
+ {
+ 	vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
+ 	vcpu->run->memory_fault.gpa = gpa;
+@@ -2502,6 +2503,8 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+ 	vcpu->run->memory_fault.flags = 0;
+ 	if (is_private)
+ 		vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
++	if (is_userfault)
++		vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_USERFAULT;
+ }
+ 
+ static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot)
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0006-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0006-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch
new file mode 100644
index 00000000000..c9e1dfe1b41
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0006-KVM-Advertise-KVM_CAP_USERFAULT-in-KVM_CHECK_EXTENSI.patch
@@ -0,0 +1,45 @@
+From 7d333f96fb00a6a4cac6ba6fb40acac58e5ccd10 Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Thu, 9 Jan 2025 20:49:20 +0000
+Subject: [PATCH 06/15] KVM: Advertise KVM_CAP_USERFAULT in KVM_CHECK_EXTENSION
+
+Advertise support for KVM_CAP_USERFAULT when kvm_has_userfault() returns
+true. Currently this is merely IS_ENABLED(CONFIG_HAVE_KVM_USERFAULT), so
+it is somewhat redundant.
+
+Signed-off-by: James Houghton <jthoughton@google.com>
+---
+ include/uapi/linux/kvm.h | 1 +
+ virt/kvm/kvm_main.c      | 4 ++++
+ 2 files changed, 5 insertions(+)
+
+diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
+index 5757a8c9b23b..82294131dac3 100644
+--- a/include/uapi/linux/kvm.h
++++ b/include/uapi/linux/kvm.h
+@@ -967,6 +967,7 @@ struct kvm_enable_cap {
+ #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+ #define KVM_CAP_GUEST_MEMFD_MMAP 244
+ #define KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP 245
++#define KVM_CAP_USERFAULT 246
+ 
+ struct kvm_irq_routing_irqchip {
+ 	__u32 irqchip;
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index f43a8f40b94b..6a80825a24cd 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -4944,6 +4944,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+ 		return 1;
+ 	case KVM_CAP_GUEST_MEMFD_MMAP:
+ 		return !kvm || kvm_arch_supports_gmem_mmap(kvm);
++#endif
++#ifdef CONFIG_HAVE_KVM_USERFAULT
++	case KVM_CAP_USERFAULT:
++		return kvm_has_userfault(kvm);
+ #endif
+ 	default:
+ 		break;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0007-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0007-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch
new file mode 100644
index 00000000000..2ce76e4d797
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/15-kvm-mem-userfault/0007-KVM-arm64-Add-support-for-KVM_MEM_USERFAULT.patch
@@ -0,0 +1,100 @@
+From 80a66be3cf8e2567b31eff9459c16005302a6f5d Mon Sep 17 00:00:00 2001
+From: James Houghton <jthoughton@google.com>
+Date: Thu, 9 Jan 2025 20:49:22 +0000
+Subject: [PATCH 07/15] KVM: arm64: Add support for KVM_MEM_USERFAULT
+
+Adhering to the requirements of KVM Userfault:
+1. When it is toggled on, zap the second stage with
+   kvm_arch_flush_shadow_memslot(). This is to respect userfault-ness.
+2. When KVM_MEM_USERFAULT is enabled, restrict new second-stage mappings
+   to be PAGE_SIZE, just like when dirty logging is enabled.
+
+Do not zap the second stage when KVM_MEM_USERFAULT is disabled to remain
+consistent with the behavior when dirty logging is disabled.
+
+Signed-off-by: James Houghton <jthoughton@google.com>
+---
+ arch/arm64/kvm/Kconfig |  1 +
+ arch/arm64/kvm/mmu.c   | 33 ++++++++++++++++++++++++++++++++-
+ 2 files changed, 33 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
+index bff62e75d681..c75d6bcd3dd8 100644
+--- a/arch/arm64/kvm/Kconfig
++++ b/arch/arm64/kvm/Kconfig
+@@ -38,6 +38,7 @@ menuconfig KVM
+ 	select SCHED_INFO
+ 	select GUEST_PERF_EVENTS if PERF_EVENTS
+ 	select KVM_GUEST_MEMFD
++	select HAVE_KVM_USERFAULT
+ 	help
+ 	  Support hosting virtualized guest machines.
+ 
+diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
+index 6af2702cc2b1..c4502c6457eb 100644
+--- a/arch/arm64/kvm/mmu.c
++++ b/arch/arm64/kvm/mmu.c
+@@ -1555,6 +1555,13 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 	/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
+ 	smp_rmb();
+ 
++	if (kvm_gfn_userfault(kvm, memslot, gfn)) {
++		kvm_prepare_memory_fault_exit(vcpu, gfn << PAGE_SHIFT,
++					      PAGE_SIZE, write_fault,
++					      exec_fault, false, true);
++		return -EFAULT;
++	}
++
+ 	ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL);
+ 	if (ret) {
+ 		kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE,
+@@ -1651,7 +1658,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 		return -EFAULT;
+ 	}
+ 
+-	if (force_pte)
++	if (force_pte || kvm_memslot_userfault(memslot))
+ 		vma_shift = PAGE_SHIFT;
+ 	else
+ 		vma_shift = get_vma_page_shift(vma, hva);
+@@ -1742,6 +1749,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ 	mmu_seq = kvm->mmu_invalidate_seq;
+ 	mmap_read_unlock(current->mm);
+ 
++	if (kvm_gfn_userfault(kvm, memslot, gfn)) {
++		kvm_prepare_memory_fault_exit(vcpu, gfn << PAGE_SHIFT,
++					      PAGE_SIZE, write_fault,
++					      exec_fault, false, true);
++		return -EFAULT;
++	}
++
+ 	pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
+ 				&writable, &page);
+ 	if (pfn == KVM_PFN_ERR_HWPOISON) {
+@@ -2245,6 +2259,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
+ 				   enum kvm_mr_change change)
+ {
+ 	bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
++	u32 new_flags = new ? new->flags : 0;
++	u32 changed_flags = (new_flags) ^ (old ? old->flags : 0);
++
++	/*
++	 * If KVM_MEM_USERFAULT has been enabled, drop all the stage-2 mappings
++	 * so that we can respect userfault-ness.
++	 */
++	if ((changed_flags & KVM_MEM_USERFAULT) &&
++	    (new_flags & KVM_MEM_USERFAULT) &&
++	    change == KVM_MR_FLAGS_ONLY)
++		kvm_arch_flush_shadow_memslot(kvm, old);
++
++	/*
++	 * Nothing left to do if not toggling dirty logging.
++	 */
++	if (!(changed_flags & KVM_MEM_LOG_DIRTY_PAGES))
++		return;
+ 
+ 	/*
+ 	 * At this point memslot has been committed and there is an
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/20-gmem-write/0008-KVM-guest_memfd-add-generic-population-via-write.patch b/resources/hiding_ci/linux_patches/20-gmem-write/0008-KVM-guest_memfd-add-generic-population-via-write.patch
new file mode 100644
index 00000000000..1f10b5fa10f
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/20-gmem-write/0008-KVM-guest_memfd-add-generic-population-via-write.patch
@@ -0,0 +1,122 @@
+From 6b2a80b84a714b429347f5ba3e2d5f0be2eb3b95 Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Tue, 2 Sep 2025 11:20:03 +0000
+Subject: [PATCH 08/15] KVM: guest_memfd: add generic population via write
+
+write syscall populates guest_memfd with user-supplied data in a generic
+way, ie no vendor-specific preparation is performed.  This is supposed
+to be used in non-CoCo setups where guest memory is not
+hardware-encrypted.
+
+The following behaviour is implemented:
+ - only page-aligned count and offset are allowed
+ - if the memory is already allocated, the call will successfully
+   populate it
+ - if the memory is not allocated, the call will both allocate and
+   populate
+ - if the memory is already populated, the call will not repopulate it
+
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ virt/kvm/guest_memfd.c | 64 +++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 63 insertions(+), 1 deletion(-)
+
+diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
+index 20217332dcd1..b77af4c48b9a 100644
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -402,7 +402,9 @@ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+ }
+ 
+ static struct file_operations kvm_gmem_fops = {
+-	.mmap		= kvm_gmem_mmap,
++	.mmap           = kvm_gmem_mmap,
++	.llseek         = default_llseek,
++	.write_iter     = generic_perform_write,
+ 	.open		= generic_file_open,
+ 	.release	= kvm_gmem_release,
+ 	.fallocate	= kvm_gmem_fallocate,
+@@ -413,6 +415,63 @@ void kvm_gmem_init(struct module *module)
+ 	kvm_gmem_fops.owner = module;
+ }
+ 
++static int kvm_kmem_gmem_write_begin(const struct kiocb *kiocb,
++				     struct address_space *mapping,
++				     loff_t pos, unsigned int len,
++				     struct folio **foliop,
++				     void **fsdata)
++{
++	struct file *file = kiocb->ki_filp;
++	pgoff_t index = pos >> PAGE_SHIFT;
++	struct folio *folio;
++
++	if (!PAGE_ALIGNED(pos) || len != PAGE_SIZE)
++		return -EINVAL;
++
++	if (pos + len > i_size_read(file_inode(file)))
++		return -EINVAL;
++
++	folio = kvm_gmem_get_folio(file_inode(file), index);
++	if (IS_ERR(folio))
++		return -EFAULT;
++
++	if (WARN_ON_ONCE(folio_test_large(folio))) {
++		folio_unlock(folio);
++		folio_put(folio);
++		return -EFAULT;
++	}
++
++	if (folio_test_uptodate(folio)) {
++		folio_unlock(folio);
++		folio_put(folio);
++		return -ENOSPC;
++	}
++
++	*foliop = folio;
++	return 0;
++}
++
++static int kvm_kmem_gmem_write_end(const struct kiocb *kiocb,
++				   struct address_space *mapping,
++				   loff_t pos, unsigned int len,
++				   unsigned int copied,
++				   struct folio *folio, void *fsdata)
++{
++	if (copied) {
++		if (copied < len) {
++			unsigned int from = pos & (PAGE_SIZE - 1);
++
++			folio_zero_range(folio, from + copied, len - copied);
++		}
++		kvm_gmem_mark_prepared(folio);
++	}
++
++	folio_unlock(folio);
++	folio_put(folio);
++
++	return copied;
++}
++
+ static int kvm_gmem_migrate_folio(struct address_space *mapping,
+ 				  struct folio *dst, struct folio *src,
+ 				  enum migrate_mode mode)
+@@ -467,6 +526,8 @@ static void kvm_gmem_free_folio(struct address_space *mapping,
+ 
+ static const struct address_space_operations kvm_gmem_aops = {
+ 	.dirty_folio = noop_dirty_folio,
++	.write_begin = kvm_kmem_gmem_write_begin,
++	.write_end = kvm_kmem_gmem_write_end,
+ 	.migrate_folio	= kvm_gmem_migrate_folio,
+ 	.error_remove_folio = kvm_gmem_error_folio,
+ 	.free_folio = kvm_gmem_free_folio,
+@@ -512,6 +573,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
+ 	}
+ 
+ 	file->f_flags |= O_LARGEFILE;
++	file->f_mode |= FMODE_LSEEK | FMODE_PWRITE;
+ 
+ 	inode = file->f_inode;
+ 	WARN_ON(file->f_mapping != inode->i_mapping);
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/20-gmem-write/0009-KVM-selftests-update-guest_memfd-write-tests.patch b/resources/hiding_ci/linux_patches/20-gmem-write/0009-KVM-selftests-update-guest_memfd-write-tests.patch
new file mode 100644
index 00000000000..3da3a39f7b2
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/20-gmem-write/0009-KVM-selftests-update-guest_memfd-write-tests.patch
@@ -0,0 +1,127 @@
+From cd137bca2b0b33832613019e7af45549be8cd583 Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Tue, 2 Sep 2025 11:20:15 +0000
+Subject: [PATCH 09/15] KVM: selftests: update guest_memfd write tests
+
+This is to reflect that the write syscall is now implemented for
+guest_memfd.
+
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ .../testing/selftests/kvm/guest_memfd_test.c  | 86 +++++++++++++++++--
+ 1 file changed, 80 insertions(+), 6 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
+index 1187438b6831..1f804af16689 100644
+--- a/tools/testing/selftests/kvm/guest_memfd_test.c
++++ b/tools/testing/selftests/kvm/guest_memfd_test.c
+@@ -24,18 +24,91 @@
+ #include "test_util.h"
+ #include "ucall_common.h"
+ 
+-static void test_file_read_write(int fd)
++static void test_file_read(int fd)
+ {
+ 	char buf[64];
+ 
+ 	TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
+ 		    "read on a guest_mem fd should fail");
+-	TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
+-		    "write on a guest_mem fd should fail");
+ 	TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
+ 		    "pread on a guest_mem fd should fail");
+-	TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
+-		    "pwrite on a guest_mem fd should fail");
++}
++
++static void test_file_write(int fd, size_t total_size)
++{
++	size_t page_size = getpagesize();
++	void *buf = NULL;
++	int ret;
++
++	ret = posix_memalign(&buf, page_size, total_size);
++	TEST_ASSERT_EQ(ret, 0);
++
++	/* Check arguments correctness checks work as expected */
++
++	ret = pwrite(fd, buf, page_size - 1, 0);
++	TEST_ASSERT(ret == -1, "write unaligned count on a guest_mem fd should fail");
++	TEST_ASSERT_EQ(errno, EINVAL);
++
++	ret = pwrite(fd, buf, page_size, 1);
++	TEST_ASSERT(ret == -1, "write unaligned offset on a guest_mem fd should fail");
++	TEST_ASSERT_EQ(errno, EINVAL);
++
++	ret = pwrite(fd, buf, page_size, total_size);
++	TEST_ASSERT(ret == -1, "writing past the file size on a guest_mem fd should fail");
++	TEST_ASSERT_EQ(errno, EINVAL);
++
++	ret = pwrite(fd, NULL, page_size, 0);
++	TEST_ASSERT(ret == -1, "supplying a NULL buffer when writing a guest_mem fd should fail");
++	TEST_ASSERT_EQ(errno, EFAULT);
++
++	/* Check double population is not allowed */
++
++	ret = pwrite(fd, buf, page_size, 0);
++	TEST_ASSERT(ret == page_size, "page-aligned write on a guest_mem fd should succeed");
++
++	ret = pwrite(fd, buf, page_size, 0);
++	TEST_ASSERT(ret == -1, "write on already populated guest_mem fd should fail");
++	TEST_ASSERT_EQ(errno, ENOSPC);
++
++	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
++	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
++
++	/* Check population is allowed again after punching a hole */
++
++	ret = pwrite(fd, buf, page_size, 0);
++	TEST_ASSERT(ret == page_size,
++		"page-aligned write on a punched guest_mem fd should succeed");
++
++	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
++	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
++
++	/* Check population of already allocated memory is allowed */
++
++	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, page_size);
++	TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
++
++	ret = pwrite(fd, buf, page_size, 0);
++	TEST_ASSERT(ret == page_size, "write on a preallocated guest_mem fd should succeed");
++
++	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
++	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
++
++	/* Check population works until an already populated page is encountered */
++
++	ret = pwrite(fd, buf, total_size, 0);
++	TEST_ASSERT(ret == total_size, "page-aligned write on a guest_mem fd should succeed");
++
++	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
++	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
++
++	ret = pwrite(fd, buf, total_size, 0);
++	TEST_ASSERT(ret == page_size, "write on a guest_mem fd should not overwrite data");
++
++	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, total_size);
++	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
++
++
++	free(buf);
+ }
+ 
+ static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
+@@ -283,7 +356,8 @@ static void test_guest_memfd(unsigned long vm_type)
+ 
+ 	fd = vm_create_guest_memfd(vm, total_size, flags);
+ 
+-	test_file_read_write(fd);
++	test_file_read(fd);
++	test_file_write(fd, total_size);
+ 
+ 	if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ 		test_mmap_supported(fd, page_size, total_size);
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0010-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0010-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch
new file mode 100644
index 00000000000..663a05956eb
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0010-mm-userfaultfd-generic-continue-for-non-hugetlbfs.patch
@@ -0,0 +1,153 @@
+From 4a772023aa544182d6bb94a091aacf4f39b8dabd Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Mon, 31 Mar 2025 10:15:35 +0000
+Subject: [PATCH 10/15] mm: userfaultfd: generic continue for non hugetlbfs
+
+Remove shmem-specific code from UFFDIO_CONTINUE implementation for
+non-huge pages by calling vm_ops->fault().  A new VMF flag,
+FAULT_FLAG_USERFAULT_CONTINUE, is introduced to avoid recursive call to
+handle_userfault().
+
+Suggested-by: James Houghton <jthoughton@google.com>
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ include/linux/mm_types.h |  4 ++++
+ mm/hugetlb.c             |  2 +-
+ mm/shmem.c               |  9 ++++++---
+ mm/userfaultfd.c         | 37 +++++++++++++++++++++++++++----------
+ 4 files changed, 38 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 08bc2442db93..06619c07b6d3 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -1569,6 +1569,9 @@ enum tlb_flush_reason {
+  * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
+  *                        We should only access orig_pte if this flag set.
+  * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock.
++ * @FAULT_FLAG_USERFAULT_CONTINUE: The fault handler must not call userfaultfd
++ *                                 minor handler as it is being called by the
++ *                                 userfaultfd code itself.
+  *
+  * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
+  * whether we would allow page faults to retry by specifying these two
+@@ -1607,6 +1610,7 @@ enum fault_flag {
+ 	FAULT_FLAG_UNSHARE =		1 << 10,
+ 	FAULT_FLAG_ORIG_PTE_VALID =	1 << 11,
+ 	FAULT_FLAG_VMA_LOCK =		1 << 12,
++	FAULT_FLAG_USERFAULT_CONTINUE = 1 << 13,
+ };
+ 
+ typedef unsigned int __bitwise zap_flags_t;
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 753f99b4c718..7efeb52f62b9 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -6531,7 +6531,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
+ 		}
+ 
+ 		/* Check for page in userfault range. */
+-		if (userfaultfd_minor(vma)) {
++		if (userfaultfd_minor(vma) && !(vmf->flags & FAULT_FLAG_USERFAULT_CONTINUE)) {
+ 			folio_unlock(folio);
+ 			folio_put(folio);
+ 			/* See comment in userfaultfd_missing() block above */
+diff --git a/mm/shmem.c b/mm/shmem.c
+index e2c76a30802b..5bea7a10e176 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2519,7 +2519,8 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
+ 	fault_mm = vma ? vma->vm_mm : NULL;
+ 
+ 	folio = filemap_get_entry(inode->i_mapping, index);
+-	if (folio && vma && userfaultfd_minor(vma)) {
++	if (folio && vma && userfaultfd_minor(vma) &&
++	    !(vmf->flags & FAULT_FLAG_USERFAULT_CONTINUE)) {
+ 		if (!xa_is_value(folio))
+ 			folio_put(folio);
+ 		*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
+@@ -2779,6 +2780,8 @@ static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode)
+ static vm_fault_t shmem_fault(struct vm_fault *vmf)
+ {
+ 	struct inode *inode = file_inode(vmf->vma->vm_file);
++	enum sgp_type sgp = vmf->flags & FAULT_FLAG_USERFAULT_CONTINUE ?
++	    SGP_NOALLOC : SGP_CACHE;
+ 	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+ 	struct folio *folio = NULL;
+ 	vm_fault_t ret = 0;
+@@ -2795,8 +2798,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
+ 	}
+ 
+ 	WARN_ON_ONCE(vmf->page != NULL);
+-	err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE,
+-				  gfp, vmf, &ret);
++	err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, sgp, gfp, vmf,
++				  &ret);
+ 	if (err)
+ 		return vmf_error(err);
+ 	if (folio) {
+diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
+index 45e6290e2e8b..c43e4c8893b7 100644
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -376,30 +376,47 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
+ 	return ret;
+ }
+ 
+-/* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
++/* Handles UFFDIO_CONTINUE for all VMAs */
+ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
+ 				     struct vm_area_struct *dst_vma,
+ 				     unsigned long dst_addr,
+ 				     uffd_flags_t flags)
+ {
+-	struct inode *inode = file_inode(dst_vma->vm_file);
+-	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+ 	struct folio *folio;
+ 	struct page *page;
+ 	int ret;
++	struct vm_fault vmf = {
++		.vma = dst_vma,
++		.address = dst_addr,
++		.flags = FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE |
++		    FAULT_FLAG_USERFAULT_CONTINUE,
++		.pte = NULL,
++		.page = NULL,
++		.pgoff = linear_page_index(dst_vma, dst_addr),
++	};
++
++	if (!dst_vma->vm_ops || !dst_vma->vm_ops->fault)
++		return -EINVAL;
+ 
+-	ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+-	/* Our caller expects us to return -EFAULT if we failed to find folio */
+-	if (ret == -ENOENT)
++retry:
++	ret = dst_vma->vm_ops->fault(&vmf);
++	if (ret & VM_FAULT_ERROR) {
+ 		ret = -EFAULT;
+-	if (ret)
+ 		goto out;
+-	if (!folio) {
+-		ret = -EFAULT;
++	}
++
++	if (ret & VM_FAULT_NOPAGE) {
++		ret = -EAGAIN;
+ 		goto out;
+ 	}
+ 
+-	page = folio_file_page(folio, pgoff);
++	if (ret & VM_FAULT_RETRY)
++		goto retry;
++
++	page = vmf.page;
++	folio = page_folio(page);
++	BUG_ON(!folio);
++
+ 	if (PageHWPoison(page)) {
+ 		ret = -EIO;
+ 		goto out_release;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0011-mm-provide-can_userfault-vma-operation.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0011-mm-provide-can_userfault-vma-operation.patch
new file mode 100644
index 00000000000..b31b7cd01af
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0011-mm-provide-can_userfault-vma-operation.patch
@@ -0,0 +1,95 @@
+From c6b2b7c5a30d2c8aa0783b9c311fa7527878b6ed Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Fri, 4 Apr 2025 14:15:18 +0000
+Subject: [PATCH 11/15] mm: provide can_userfault vma operation
+
+The new operation allows to decouple the userfaulfd code from
+dependencies to VMA types, specifically, shmem and hugetlb.  The
+vm_flags bitmap argument is processed with "any" logic, meaning if the
+VMA type supports any of the flags set, it returns true.  This is to
+avoid multiple calls when checking for __VM_UFFD_FLAGS.
+
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ include/linux/mm.h | 5 +++++
+ mm/hugetlb.c       | 7 +++++++
+ mm/shmem.c         | 8 ++++++++
+ 3 files changed, 20 insertions(+)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 1ae97a0b8ec7..e034281b8e00 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -655,6 +655,11 @@ struct vm_operations_struct {
+ 	 */
+ 	struct page *(*find_special_page)(struct vm_area_struct *vma,
+ 					  unsigned long addr);
++	/*
++	 * True if the VMA supports userfault at least for one of the vm_flags
++	 */
++	bool (*can_userfault)(struct vm_area_struct *vma,
++			      unsigned long vm_flags);
+ };
+ 
+ #ifdef CONFIG_NUMA_BALANCING
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 7efeb52f62b9..8d7afe97c104 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5446,6 +5446,12 @@ static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
+ 	return huge_page_size(hstate_vma(vma));
+ }
+ 
++static bool hugetlb_vm_op_can_userfault(struct vm_area_struct *vma,
++					unsigned long vm_flags)
++{
++	return true;
++}
++
+ /*
+  * We cannot handle pagefaults against hugetlb pages at all.  They cause
+  * handle_mm_fault() to try to instantiate regular-sized pages in the
+@@ -5471,6 +5477,7 @@ const struct vm_operations_struct hugetlb_vm_ops = {
+ 	.close = hugetlb_vm_op_close,
+ 	.may_split = hugetlb_vm_op_split,
+ 	.pagesize = hugetlb_vm_op_pagesize,
++	.can_userfault = hugetlb_vm_op_can_userfault,
+ };
+ 
+ static pte_t make_huge_pte(struct vm_area_struct *vma, struct folio *folio,
+diff --git a/mm/shmem.c b/mm/shmem.c
+index 5bea7a10e176..313c2388247d 100644
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2943,6 +2943,12 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+ 	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
+ }
+ 
++static bool shmem_can_userfault(struct vm_area_struct *vma,
++				unsigned long vm_flags)
++{
++	return true;
++}
++
+ static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info,
+ 			pgoff_t index, unsigned int order, pgoff_t *ilx)
+ {
+@@ -5359,6 +5365,7 @@ static const struct vm_operations_struct shmem_vm_ops = {
+ 	.set_policy     = shmem_set_policy,
+ 	.get_policy     = shmem_get_policy,
+ #endif
++	.can_userfault  = shmem_can_userfault,
+ };
+ 
+ static const struct vm_operations_struct shmem_anon_vm_ops = {
+@@ -5368,6 +5375,7 @@ static const struct vm_operations_struct shmem_anon_vm_ops = {
+ 	.set_policy     = shmem_set_policy,
+ 	.get_policy     = shmem_get_policy,
+ #endif
++	.can_userfault  = shmem_can_userfault,
+ };
+ 
+ int shmem_init_fs_context(struct fs_context *fc)
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0012-mm-userfaultfd-use-can_userfault-vma-operation.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0012-mm-userfaultfd-use-can_userfault-vma-operation.patch
new file mode 100644
index 00000000000..fdeb1a665a1
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0012-mm-userfaultfd-use-can_userfault-vma-operation.patch
@@ -0,0 +1,79 @@
+From e9accab53147174d96494d30428f9deec7f078e2 Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Fri, 4 Apr 2025 14:16:49 +0000
+Subject: [PATCH 12/15] mm: userfaultfd: use can_userfault vma operation
+
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ include/linux/userfaultfd_k.h | 13 ++++++-------
+ mm/userfaultfd.c              | 10 +++++++---
+ 2 files changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
+index c0e716aec26a..47d40cec69c7 100644
+--- a/include/linux/userfaultfd_k.h
++++ b/include/linux/userfaultfd_k.h
+@@ -217,8 +217,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
+ 	if (vma->vm_flags & VM_DROPPABLE)
+ 		return false;
+ 
+-	if ((vm_flags & VM_UFFD_MINOR) &&
+-	    (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
++	if (!vma->vm_ops->can_userfault ||
++	    !vma->vm_ops->can_userfault(vma, VM_UFFD_MINOR))
+ 		return false;
+ 
+ 	/*
+@@ -231,16 +231,15 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
+ #ifndef CONFIG_PTE_MARKER_UFFD_WP
+ 	/*
+ 	 * If user requested uffd-wp but not enabled pte markers for
+-	 * uffd-wp, then shmem & hugetlbfs are not supported but only
+-	 * anonymous.
++	 * uffd-wp, then only anonymous is supported.
+ 	 */
+ 	if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma))
+ 		return false;
+ #endif
+ 
+-	/* By default, allow any of anon|shmem|hugetlb */
+-	return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
+-	    vma_is_shmem(vma);
++	return vma_is_anonymous(vma) ||
++	    (vma->vm_ops->can_userfault &&
++	     vma->vm_ops->can_userfault(vma, vm_flags));
+ }
+ 
+ static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
+diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
+index c43e4c8893b7..daf3b93e4d22 100644
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -724,6 +724,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
+ 	unsigned long src_addr, dst_addr;
+ 	long copied;
+ 	struct folio *folio;
++	bool can_userfault;
+ 
+ 	/*
+ 	 * Sanitize the command parameters:
+@@ -783,10 +784,13 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
+ 		return  mfill_atomic_hugetlb(ctx, dst_vma, dst_start,
+ 					     src_start, len, flags);
+ 
+-	if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
++	can_userfault = dst_vma->vm_ops->can_userfault &&
++	    dst_vma->vm_ops->can_userfault(dst_vma, __VM_UFFD_FLAGS);
++
++	if (!vma_is_anonymous(dst_vma) && !can_userfault)
+ 		goto out_unlock;
+-	if (!vma_is_shmem(dst_vma) &&
+-	    uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
++
++	if (!can_userfault && uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
+ 		goto out_unlock;
+ 
+ 	while (src_addr < src_start + len) {
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0013-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0013-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch
new file mode 100644
index 00000000000..05ec2b8943a
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0013-KVM-guest_memfd-add-support-for-userfaultfd-minor.patch
@@ -0,0 +1,41 @@
+From ba67c9ca3e48c070d11741726c9c78d93d6c969d Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Tue, 1 Apr 2025 15:02:56 +0000
+Subject: [PATCH 13/15] KVM: guest_memfd: add support for userfaultfd minor
+
+Add support for sending a pagefault event if userfaultfd is registered.
+Only page minor event is currently supported.
+
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ virt/kvm/guest_memfd.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
+index b77af4c48b9a..41610d501a6f 100644
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -5,6 +5,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/set_memory.h>
++#include <linux/userfaultfd_k.h>
+ 
+ #include "kvm_mm.h"
+ 
+@@ -371,6 +372,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
+ 		}
+ 	}
+ 
++	if (userfaultfd_minor(vmf->vma) &&
++	    !(vmf->flags & FAULT_FLAG_USERFAULT_CONTINUE)) {
++		folio_unlock(folio);
++		return handle_userfault(vmf, VM_UFFD_MINOR);
++	}
++
+ 	vmf->page = folio_file_page(folio, vmf->pgoff);
+ 
+ out_folio:
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0014-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0014-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch
new file mode 100644
index 00000000000..4a355191f8b
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0014-mm-userfaultfd-add-UFFD_FEATURE_MINOR_GUEST_MEMFD.patch
@@ -0,0 +1,61 @@
+From 70d0f6bdd6e68530bc7e6a69988328801cbd161c Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Fri, 4 Apr 2025 14:18:03 +0000
+Subject: [PATCH 14/15] mm: userfaultfd: add UFFD_FEATURE_MINOR_GUEST_MEMFD
+
+Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
+---
+ fs/userfaultfd.c                 | 3 ++-
+ include/uapi/linux/userfaultfd.h | 8 +++++++-
+ 2 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
+index 54c6cc7fe9c6..b3e26bccd8b9 100644
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1978,7 +1978,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
+ 	uffdio_api.features = UFFD_API_FEATURES;
+ #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+ 	uffdio_api.features &=
+-		~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
++		~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
++		UFFD_FEATURE_MINOR_GUEST_MEMFD);
+ #endif
+ #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+ 	uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
+index 2841e4ea8f2c..ed688797eba7 100644
+--- a/include/uapi/linux/userfaultfd.h
++++ b/include/uapi/linux/userfaultfd.h
+@@ -42,7 +42,8 @@
+ 			   UFFD_FEATURE_WP_UNPOPULATED |	\
+ 			   UFFD_FEATURE_POISON |		\
+ 			   UFFD_FEATURE_WP_ASYNC |		\
+-			   UFFD_FEATURE_MOVE)
++			   UFFD_FEATURE_MOVE |			\
++			   UFFD_FEATURE_MINOR_GUEST_MEMFD)
+ #define UFFD_API_IOCTLS				\
+ 	((__u64)1 << _UFFDIO_REGISTER |		\
+ 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
+@@ -230,6 +231,10 @@ struct uffdio_api {
+ 	 *
+ 	 * UFFD_FEATURE_MOVE indicates that the kernel supports moving an
+ 	 * existing page contents from userspace.
++	 *
++	 * UFFD_FEATURE_MINOR_GUEST_MEMFD indicates the same support as
++	 * UFFD_FEATURE_MINOR_HUGETLBFS, but for guest_memfd-backed pages
++	 * instead.
+ 	 */
+ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
+ #define UFFD_FEATURE_EVENT_FORK			(1<<1)
+@@ -248,6 +253,7 @@ struct uffdio_api {
+ #define UFFD_FEATURE_POISON			(1<<14)
+ #define UFFD_FEATURE_WP_ASYNC			(1<<15)
+ #define UFFD_FEATURE_MOVE			(1<<16)
++#define UFFD_FEATURE_MINOR_GUEST_MEMFD		(1<<17)
+ 	__u64 features;
+ 
+ 	__u64 ioctls;
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/25-gmem-uffd/0015-fixup-for-guest_memfd-uffd-v3.patch b/resources/hiding_ci/linux_patches/25-gmem-uffd/0015-fixup-for-guest_memfd-uffd-v3.patch
new file mode 100644
index 00000000000..cad7d7b3e6f
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/25-gmem-uffd/0015-fixup-for-guest_memfd-uffd-v3.patch
@@ -0,0 +1,71 @@
+From 3c48c32e0ed1b2bf97fc560fc91f2e62fd700e89 Mon Sep 17 00:00:00 2001
+From: Nikita Kalyazin <kalyazin@amazon.com>
+Date: Thu, 10 Apr 2025 14:18:53 +0000
+Subject: [PATCH 15/15] fixup for guest_memfd uffd v3
+
+ - implement can_userfault for guest_memfd
+ - check vma->vm_ops pointer before dereferencing
+ - proper check for VM_UFFD_MINOR
+---
+ include/linux/userfaultfd_k.h | 8 +++++---
+ mm/userfaultfd.c              | 4 +++-
+ virt/kvm/guest_memfd.c        | 7 +++++++
+ 3 files changed, 15 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
+index 47d40cec69c7..b4f5b90f2e40 100644
+--- a/include/linux/userfaultfd_k.h
++++ b/include/linux/userfaultfd_k.h
+@@ -217,9 +217,11 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
+ 	if (vma->vm_flags & VM_DROPPABLE)
+ 		return false;
+ 
+-	if (!vma->vm_ops->can_userfault ||
+-	    !vma->vm_ops->can_userfault(vma, VM_UFFD_MINOR))
+-		return false;
++       if ((vm_flags & VM_UFFD_MINOR) &&
++            (!vma->vm_ops ||
++             !vma->vm_ops->can_userfault ||
++             !vma->vm_ops->can_userfault(vma, VM_UFFD_MINOR)))
++                return false;
+ 
+ 	/*
+ 	 * If wp async enabled, and WP is the only mode enabled, allow any
+diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
+index daf3b93e4d22..795474ab7436 100644
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -784,7 +784,9 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
+ 		return  mfill_atomic_hugetlb(ctx, dst_vma, dst_start,
+ 					     src_start, len, flags);
+ 
+-	can_userfault = dst_vma->vm_ops->can_userfault &&
++       can_userfault =
++           dst_vma->vm_ops &&
++           dst_vma->vm_ops->can_userfault &&
+ 	    dst_vma->vm_ops->can_userfault(dst_vma, __VM_UFFD_FLAGS);
+ 
+ 	if (!vma_is_anonymous(dst_vma) && !can_userfault)
+diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
+index 41610d501a6f..1f17be5a84a8 100644
+--- a/virt/kvm/guest_memfd.c
++++ b/virt/kvm/guest_memfd.c
+@@ -389,8 +389,15 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
+ 	return ret;
+ }
+ 
++static bool kvm_gmem_can_userfault(struct vm_area_struct *vma,
++                                  unsigned long vm_flags)
++{
++       return vm_flags & VM_UFFD_MINOR;
++}
++
+ static const struct vm_operations_struct kvm_gmem_vm_ops = {
+ 	.fault = kvm_gmem_fault_user_mapping,
++	.can_userfault = kvm_gmem_can_userfault,
+ };
+ 
+ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+-- 
+2.51.0
+
diff --git a/resources/hiding_ci/linux_patches/GPL-2.0 b/resources/hiding_ci/linux_patches/GPL-2.0
new file mode 100644
index 00000000000..ff0812fd89c
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/GPL-2.0
@@ -0,0 +1,359 @@
+Valid-License-Identifier: GPL-2.0
+Valid-License-Identifier: GPL-2.0-only
+Valid-License-Identifier: GPL-2.0+
+Valid-License-Identifier: GPL-2.0-or-later
+SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
+Usage-Guide:
+  To use this license in source code, put one of the following SPDX
+  tag/value pairs into a comment according to the placement
+  guidelines in the licensing rules documentation.
+  For 'GNU General Public License (GPL) version 2 only' use:
+    SPDX-License-Identifier: GPL-2.0
+  or
+    SPDX-License-Identifier: GPL-2.0-only
+  For 'GNU General Public License (GPL) version 2 or any later version' use:
+    SPDX-License-Identifier: GPL-2.0+
+  or
+    SPDX-License-Identifier: GPL-2.0-or-later
+License-Text:
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/resources/hiding_ci/linux_patches/README.md b/resources/hiding_ci/linux_patches/README.md
new file mode 100644
index 00000000000..8889ed95e77
--- /dev/null
+++ b/resources/hiding_ci/linux_patches/README.md
@@ -0,0 +1,8 @@
+# Linux kernel patches for direct map removal
+
+The Linux kernel patches in this directory and its subdirectories are
+distributed under the `GPL-2.0` licence (see the full licence text at
+[GPL-2.0](./GPL-2.0)). The patches are required by Firecracker's "Secret
+Freedom" feature that removes the VM memory from the host direct map (see
+[lore](https://lore.kernel.org/kvm/20250221160728.1584559-1-roypat@amazon.co.uk/)
+for more details). The patches are not yet merged upstream.
diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml
index c83ea50266a..eaecec3cd44 100644
--- a/src/firecracker/Cargo.toml
+++ b/src/firecracker/Cargo.toml
@@ -49,7 +49,7 @@ regex = { version = "1.11.2", default-features = false, features = [
 
 # Dev-Dependencies for uffd examples
 serde = { version = "1.0.219", features = ["derive"] }
-userfaultfd = "0.9.0"
+userfaultfd = { version = "0.9.0", features = ["linux5_13"] }
 
 [lints]
 workspace = true
diff --git a/src/firecracker/examples/uffd/fault_all_handler.rs b/src/firecracker/examples/uffd/fault_all_handler.rs
index ca7601ebf25..9aadc42670e 100644
--- a/src/firecracker/examples/uffd/fault_all_handler.rs
+++ b/src/firecracker/examples/uffd/fault_all_handler.rs
@@ -5,6 +5,8 @@
 //! which loads the whole region from the backing memory file
 //! when a page fault occurs.
 
+#![allow(clippy::cast_possible_truncation)]
+
 mod uffd_utils;
 
 use std::fs::File;
@@ -23,27 +25,80 @@ fn main() {
     // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
     let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path");
     let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
+    stream
+        .set_nonblocking(true)
+        .expect("Cannot set non-blocking");
 
     let mut runtime = Runtime::new(stream, file);
     runtime.install_panic_hook();
-    runtime.run(|uffd_handler: &mut UffdHandler| {
-        // Read an event from the userfaultfd.
-        let event = uffd_handler
-            .read_event()
-            .expect("Failed to read uffd_msg")
-            .expect("uffd_msg not ready");
-
-        match event {
-            userfaultfd::Event::Pagefault { .. } => {
-                let start = get_time_us(ClockType::Monotonic);
-                for region in uffd_handler.mem_regions.clone() {
-                    uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
+    runtime.run(
+        |uffd_handler: &mut UffdHandler| {
+            // Read an event from the userfaultfd.
+            let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") else {
+                return;
+            };
+
+            if let userfaultfd::Event::Pagefault { addr, .. } = event {
+                let bit =
+                    uffd_handler.addr_to_offset(addr.cast()) as usize / uffd_handler.page_size;
+
+                // If Secret Free, we know if this is the first fault based on the userfault
+                // bitmap state. Otherwise, we assume that we will ever only receive a single fault
+                // event via UFFD.
+                let are_we_faulted_yet = uffd_handler
+                    .userfault_bitmap
+                    .as_mut()
+                    .is_some_and(|bitmap| !bitmap.is_bit_set(bit));
+
+                if are_we_faulted_yet {
+                    // TODO: we currently ignore the result as we may attempt to
+                    // populate the page that is already present as we may receive
+                    // multiple minor fault events per page.
+                    _ = uffd_handler
+                        .uffd
+                        .r#continue(addr, uffd_handler.page_size, true)
+                        .inspect_err(|err| println!("Error during uffdio_continue: {:?}", err));
+                } else {
+                    fault_all(uffd_handler, addr);
                 }
-                let end = get_time_us(ClockType::Monotonic);
+            }
+        },
+        |_uffd_handler: &mut UffdHandler, _offset: usize| {},
+    );
+}
 
-                println!("Finished Faulting All: {}us", end - start);
+fn fault_all(uffd_handler: &mut UffdHandler, fault_addr: *mut libc::c_void) {
+    let start = get_time_us(ClockType::Monotonic);
+    for region in uffd_handler.mem_regions.clone() {
+        match uffd_handler.guest_memfd {
+            None => {
+                uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
+            }
+            Some(_) => {
+                let written = uffd_handler.populate_via_write(region.offset as usize, region.size);
+
+                // This code is written under the assumption that the first fault triggered by
+                // Firecracker is either due to an MSR write (on x86) or due to device restoration
+                // reading from guest memory to check the virtio queues are sane (on
+                // ARM). This will be reported via a UFFD minor fault which needs to
+                // be handled via memcpy. Importantly, we get to the UFFD handler
+                // with the actual guest_memfd page already faulted in, meaning pwrite will stop
+                // once it gets to the offset of that page (e.g. written < region.size above).
+                // Thus, to fault in everything, we now need to skip this one page, write the
+                // remaining region, and then deal with the "gap" via uffd_handler.serve_pf().
+
+                if written < region.size - uffd_handler.page_size {
+                    let r = uffd_handler.populate_via_write(
+                        region.offset as usize + written + uffd_handler.page_size,
+                        region.size - written - uffd_handler.page_size,
+                    );
+                    assert_eq!(written + r, region.size - uffd_handler.page_size);
+                }
             }
-            _ => panic!("Unexpected event on userfaultfd"),
         }
-    });
+    }
+    uffd_handler.serve_pf(fault_addr.cast(), uffd_handler.page_size);
+    let end = get_time_us(ClockType::Monotonic);
+
+    println!("Finished Faulting All: {}us", end - start);
 }
diff --git a/src/firecracker/examples/uffd/malicious_handler.rs b/src/firecracker/examples/uffd/malicious_handler.rs
index 9af94e057aa..c926b976207 100644
--- a/src/firecracker/examples/uffd/malicious_handler.rs
+++ b/src/firecracker/examples/uffd/malicious_handler.rs
@@ -21,17 +21,23 @@ fn main() {
     // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
     let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path");
     let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
+    stream
+        .set_nonblocking(true)
+        .expect("Cannot set non-blocking");
 
     let mut runtime = Runtime::new(stream, file);
-    runtime.run(|uffd_handler: &mut UffdHandler| {
-        // Read an event from the userfaultfd.
-        let event = uffd_handler
-            .read_event()
-            .expect("Failed to read uffd_msg")
-            .expect("uffd_msg not ready");
-
-        if let userfaultfd::Event::Pagefault { .. } = event {
-            panic!("Fear me! I am the malicious page fault handler.")
-        }
-    });
+    runtime.run(
+        |uffd_handler: &mut UffdHandler| {
+            // Read an event from the userfaultfd.
+            let event = uffd_handler
+                .read_event()
+                .expect("Failed to read uffd_msg")
+                .expect("uffd_msg not ready");
+
+            if let userfaultfd::Event::Pagefault { .. } = event {
+                panic!("Fear me! I am the malicious page fault handler.")
+            }
+        },
+        |_uffd_handler: &mut UffdHandler, _offset: usize| {},
+    );
 }
diff --git a/src/firecracker/examples/uffd/on_demand_handler.rs b/src/firecracker/examples/uffd/on_demand_handler.rs
index 3be958b3578..3b8bc0a9288 100644
--- a/src/firecracker/examples/uffd/on_demand_handler.rs
+++ b/src/firecracker/examples/uffd/on_demand_handler.rs
@@ -5,6 +5,8 @@
 //! which loads the whole region from the backing memory file
 //! when a page fault occurs.
 
+#![allow(clippy::cast_possible_truncation)]
+
 mod uffd_utils;
 
 use std::fs::File;
@@ -22,84 +24,130 @@ fn main() {
     // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
     let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path");
     let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
+    stream
+        .set_nonblocking(true)
+        .expect("Cannot set non-blocking");
 
     let mut runtime = Runtime::new(stream, file);
     runtime.install_panic_hook();
-    runtime.run(|uffd_handler: &mut UffdHandler| {
-        // !DISCLAIMER!
-        // When using UFFD together with the balloon device, this handler needs to deal with
-        // `remove` and `pagefault` events. There are multiple things to keep in mind in
-        // such setups:
-        //
-        // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
-        // -----------------------------------------------------------------------------------
-        //
-        // This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
-        // arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
-        // UFFD, and then go back to the process the pre-fetched events.
-        //
-        // UFFD might receive events in not in their causal order
-        // -----------------------------------------------------
-        //
-        // For example, the guest
-        // kernel might first respond to a balloon inflation by freeing some memory, and
-        // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
-        // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
-        // guest kernel might immediately fault the page in again (for example because
-        // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
-        //
-        // However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
-        // balloon device is handled by Firecracker on its VMM thread. This means that potentially
-        // this handler can receive the `pagefault` _before_ the `remove` event.
-        //
-        // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
-        // to make sure no `remove` event is blocking us can result in the handler acting on
-        // the `pagefault` event before the `remove` message (despite the `remove` event being
-        // in the causal past of the `pagefault` event), which means that we will fault in a page
-        // from the snapshot file, while really we should be faulting in a zero page.
-        //
-        // In this example handler, we ignore this problem, to avoid
-        // complexity (under the assumption that the guest kernel will zero a newly faulted in
-        // page anyway). A production handler will most likely want to ensure that `remove`
-        // events for a specific range are always handled before `pagefault` events.
-        //
-        // Lastly, we still need to deal with the race condition where a `remove` event arrives
-        // in the UFFD queue after we got done reading all events, in which case we need to go
-        // back to reading more events before we can continue processing `pagefault`s.
-        let mut deferred_events = Vec::new();
+    runtime.run(
+        |uffd_handler: &mut UffdHandler| {
+            // !DISCLAIMER!
+            // When using UFFD together with the balloon device, this handler needs to deal with
+            // `remove` and `pagefault` events. There are multiple things to keep in mind in
+            // such setups:
+            //
+            // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
+            // -----------------------------------------------------------------------------------
+            //
+            // This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
+            // event arrives, we need to pre-fetch all other events up to the `remove`
+            // event, to unblock the UFFD, and then go back to the process the
+            // pre-fetched events.
+            //
+            // UFFD might receive events in not in their causal order
+            // -----------------------------------------------------
+            //
+            // For example, the guest
+            // kernel might first respond to a balloon inflation by freeing some memory, and
+            // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
+            // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
+            // guest kernel might immediately fault the page in again (for example because
+            // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
+            //
+            // However, the pagefault will be triggered from inside KVM on the vCPU thread, while
+            // the balloon device is handled by Firecracker on its VMM thread. This
+            // means that potentially this handler can receive the `pagefault` _before_
+            // the `remove` event.
+            //
+            // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
+            // to make sure no `remove` event is blocking us can result in the handler acting on
+            // the `pagefault` event before the `remove` message (despite the `remove` event being
+            // in the causal past of the `pagefault` event), which means that we will fault in a
+            // page from the snapshot file, while really we should be faulting in a zero
+            // page.
+            //
+            // In this example handler, we ignore this problem, to avoid
+            // complexity (under the assumption that the guest kernel will zero a newly faulted in
+            // page anyway). A production handler will most likely want to ensure that `remove`
+            // events for a specific range are always handled before `pagefault` events.
+            //
+            // Lastly, we still need to deal with the race condition where a `remove` event arrives
+            // in the UFFD queue after we got done reading all events, in which case we need to go
+            // back to reading more events before we can continue processing `pagefault`s.
+            let mut deferred_events = Vec::new();
 
-        loop {
-            // First, try events that we couldn't handle last round
-            let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
+            loop {
+                // First, try events that we couldn't handle last round
+                let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
 
-            // Read all events from the userfaultfd.
-            while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") {
-                events_to_handle.push(event);
-            }
+                // Read all events from the userfaultfd.
+                while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg")
+                {
+                    events_to_handle.push(event);
+                }
+
+                for event in events_to_handle.drain(..) {
+                    // We expect to receive either a Page Fault or `remove`
+                    // event (if the balloon device is enabled).
+                    match event {
+                        userfaultfd::Event::Pagefault { addr, .. } => {
+                            let bit = uffd_handler.addr_to_offset(addr.cast()) as usize
+                                / uffd_handler.page_size;
 
-            for event in events_to_handle.drain(..) {
-                // We expect to receive either a Page Fault or `remove`
-                // event (if the balloon device is enabled).
-                match event {
-                    userfaultfd::Event::Pagefault { addr, .. } => {
-                        if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
-                            deferred_events.push(event);
+                            if uffd_handler.userfault_bitmap.is_some() {
+                                if uffd_handler
+                                    .userfault_bitmap
+                                    .as_mut()
+                                    .unwrap()
+                                    .is_bit_set(bit)
+                                {
+                                    if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
+                                        deferred_events.push(event);
+                                    }
+                                } else {
+                                    // TODO: we currently ignore the result as we may attempt to
+                                    // populate the page that is already present as we may receive
+                                    // multiple minor fault events per page.
+                                    let _ = uffd_handler
+                                        .uffd
+                                        .r#continue(addr.cast(), uffd_handler.page_size, true)
+                                        .inspect_err(|err| {
+                                            println!("uffdio_continue error: {:?}", err)
+                                        });
+                                }
+                            } else if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
+                                deferred_events.push(event);
+                            }
                         }
+                        userfaultfd::Event::Remove { start, end } => {
+                            uffd_handler.mark_range_removed(start as u64, end as u64)
+                        }
+                        _ => panic!("Unexpected event on userfaultfd"),
                     }
-                    userfaultfd::Event::Remove { start, end } => {
-                        uffd_handler.mark_range_removed(start as u64, end as u64)
-                    }
-                    _ => panic!("Unexpected event on userfaultfd"),
+                }
+
+                // We assume that really only the above removed/pagefault interaction can result in
+                // deferred events. In that scenario, the loop will always terminate (unless
+                // newly arriving `remove` events end up indefinitely blocking it, but there's
+                // nothing we can do about that, and it's a largely theoretical
+                // problem).
+                if deferred_events.is_empty() {
+                    break;
                 }
             }
+        },
+        |uffd_handler: &mut UffdHandler, offset: usize| {
+            let bytes_written = uffd_handler.populate_via_write(offset, uffd_handler.page_size);
 
-            // We assume that really only the above removed/pagefault interaction can result in
-            // deferred events. In that scenario, the loop will always terminate (unless
-            // newly arriving `remove` events end up indefinitely blocking it, but there's nothing
-            // we can do about that, and it's a largely theoretical problem).
-            if deferred_events.is_empty() {
-                break;
+            if bytes_written == 0 {
+                println!(
+                    "got a vcpu fault for an already populated page at offset {}",
+                    offset
+                );
+            } else {
+                assert_eq!(bytes_written, uffd_handler.page_size);
             }
-        }
-    });
+        },
+    );
 }
diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs
index b00a9b8c143..480e09e3ad7 100644
--- a/src/firecracker/examples/uffd/uffd_utils.rs
+++ b/src/firecracker/examples/uffd/uffd_utils.rs
@@ -5,22 +5,32 @@
     clippy::cast_possible_truncation,
     clippy::cast_sign_loss,
     clippy::undocumented_unsafe_blocks,
+    clippy::ptr_as_ptr,
+    clippy::cast_possible_wrap,
     // Not everything is used by both binaries
     dead_code
 )]
 
-use std::collections::{HashMap, HashSet};
+mod userfault_bitmap;
+
+use std::collections::HashSet;
 use std::ffi::c_void;
 use std::fs::File;
+use std::io::{Read, Write};
+use std::num::NonZero;
 use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd};
 use std::os::unix::net::UnixStream;
 use std::ptr;
+use std::sync::atomic::AtomicU64;
 use std::time::Duration;
 
 use serde::{Deserialize, Serialize};
+use serde_json::{Deserializer, StreamDeserializer};
 use userfaultfd::{Error, Event, Uffd};
 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
 
+use crate::uffd_utils::userfault_bitmap::UserfaultBitmap;
+
 // This is the same with the one used in src/vmm.
 /// This describes the mapping between Firecracker base virtual address and offset in the
 /// buffer or file backend for a guest memory region. It is used to tell an external
@@ -41,6 +51,66 @@ pub struct GuestRegionUffdMapping {
     pub page_size: usize,
 }
 
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct FaultRequest {
+    /// vCPU that encountered the fault
+    pub vcpu: u32,
+    /// Offset in guest_memfd where the fault occured
+    pub offset: u64,
+    /// Flags
+    pub flags: u64,
+    /// Async PF token
+    pub token: Option<u32>,
+}
+
+impl FaultRequest {
+    pub fn into_reply(self, len: u64) -> FaultReply {
+        FaultReply {
+            vcpu: Some(self.vcpu),
+            offset: self.offset,
+            len,
+            flags: self.flags,
+            token: self.token,
+            zero: false,
+        }
+    }
+}
+
+/// FaultReply
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct FaultReply {
+    /// vCPU that encountered the fault, from `FaultRequest` (if present, otherwise 0)
+    pub vcpu: Option<u32>,
+    /// Offset in guest_memfd where population started
+    pub offset: u64,
+    /// Length of populated area
+    pub len: u64,
+    /// Flags, must be copied from `FaultRequest`, otherwise 0
+    pub flags: u64,
+    /// Async PF token, must be copied from `FaultRequest`, otherwise None
+    pub token: Option<u32>,
+    /// Whether the populated pages are zero pages
+    pub zero: bool,
+}
+
+/// UffdMsgFromFirecracker
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(untagged)]
+pub enum UffdMsgFromFirecracker {
+    /// Mappings
+    Mappings(Vec<GuestRegionUffdMapping>),
+    /// FaultReq
+    FaultReq(FaultRequest),
+}
+
+/// UffdMsgToFirecracker
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(untagged)]
+pub enum UffdMsgToFirecracker {
+    /// FaultRep
+    FaultRep(FaultReply),
+}
+
 impl GuestRegionUffdMapping {
     fn contains(&self, fault_page_addr: u64) -> bool {
         fault_page_addr >= self.base_host_virt_addr
@@ -53,8 +123,11 @@ pub struct UffdHandler {
     pub mem_regions: Vec<GuestRegionUffdMapping>,
     pub page_size: usize,
     backing_buffer: *const u8,
-    uffd: Uffd,
+    pub uffd: Uffd,
     removed_pages: HashSet<u64>,
+    pub guest_memfd: Option<File>,
+    pub guest_memfd_addr: Option<*mut u8>,
+    pub userfault_bitmap: Option<UserfaultBitmap>,
 }
 
 impl UffdHandler {
@@ -98,17 +171,37 @@ impl UffdHandler {
         panic!("Could not get UFFD and mappings after 5 retries");
     }
 
-    pub fn from_unix_stream(stream: &UnixStream, backing_buffer: *const u8, size: usize) -> Self {
-        let (body, file) = Self::get_mappings_and_file(stream);
-        let mappings =
-            serde_json::from_str::<Vec<GuestRegionUffdMapping>>(&body).unwrap_or_else(|_| {
-                panic!("Cannot deserialize memory mappings. Received body: {body}")
-            });
+    fn mmap_helper(len: libc::size_t, fd: libc::c_int) -> *mut libc::c_void {
+        // SAFETY: `mmap` is a safe function to call with valid parameters.
+        let ret = unsafe {
+            libc::mmap(
+                ptr::null_mut(),
+                len,
+                libc::PROT_WRITE,
+                libc::MAP_SHARED,
+                fd,
+                0,
+            )
+        };
+
+        assert_ne!(ret, libc::MAP_FAILED);
+
+        ret
+    }
+
+    pub fn from_mappings(
+        mappings: Vec<GuestRegionUffdMapping>,
+        uffd: File,
+        guest_memfd: Option<File>,
+        userfault_bitmap_memfd: Option<File>,
+        backing_buffer: *const u8,
+        size: usize,
+    ) -> Self {
         let memsize: usize = mappings.iter().map(|r| r.size).sum();
         // Page size is the same for all memory regions, so just grab the first one
         let first_mapping = mappings.first().unwrap_or_else(|| {
             panic!(
-                "Cannot get the first mapping. Mappings size is {}. Received body: {body}",
+                "Cannot get the first mapping. Mappings size is {}.",
                 mappings.len()
             )
         });
@@ -118,14 +211,46 @@ impl UffdHandler {
         assert_eq!(memsize, size);
         assert!(page_size.is_power_of_two());
 
-        let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) };
-
-        Self {
-            mem_regions: mappings,
-            page_size,
-            backing_buffer,
-            uffd,
-            removed_pages: HashSet::new(),
+        let uffd = unsafe { Uffd::from_raw_fd(uffd.into_raw_fd()) };
+
+        match (&guest_memfd, &userfault_bitmap_memfd) {
+            (Some(guestmem_file), Some(bitmap_file)) => {
+                let guest_memfd_addr =
+                    Some(Self::mmap_helper(size, guestmem_file.as_raw_fd()) as *mut u8);
+
+                let bitmap_ptr = Self::mmap_helper(size, bitmap_file.as_raw_fd()) as *mut AtomicU64;
+
+                // SAFETY: The bitmap pointer is valid and the size is correct.
+                let userfault_bitmap = Some(unsafe {
+                    UserfaultBitmap::new(bitmap_ptr, memsize, NonZero::new(page_size).unwrap())
+                });
+
+                Self {
+                    mem_regions: mappings,
+                    page_size,
+                    backing_buffer,
+                    uffd,
+                    removed_pages: HashSet::new(),
+                    guest_memfd,
+                    guest_memfd_addr,
+                    userfault_bitmap,
+                }
+            }
+            (None, None) => Self {
+                mem_regions: mappings,
+                page_size,
+                backing_buffer,
+                uffd,
+                removed_pages: HashSet::new(),
+                guest_memfd: None,
+                guest_memfd_addr: None,
+                userfault_bitmap: None,
+            },
+            (_, _) => {
+                panic!(
+                    "Only both guest_memfd and userfault_bitmap_memfd can be set at the same time."
+                );
+            }
         }
     }
 
@@ -142,6 +267,20 @@ impl UffdHandler {
         }
     }
 
+    pub fn addr_to_offset(&self, addr: *mut u8) -> u64 {
+        let addr = addr as u64;
+        for region in &self.mem_regions {
+            if region.contains(addr) {
+                return addr - region.base_host_virt_addr + region.offset;
+            }
+        }
+
+        panic!(
+            "Could not find addr: {:#x} within guest region mappings.",
+            addr
+        );
+    }
+
     pub fn serve_pf(&mut self, addr: *mut u8, len: usize) -> bool {
         // Find the start of the page that the current faulting address belongs to.
         let dst = (addr as usize & !(self.page_size - 1)) as *mut libc::c_void;
@@ -154,7 +293,7 @@ impl UffdHandler {
 
         for region in self.mem_regions.iter() {
             if region.contains(fault_page_addr) {
-                return self.populate_from_file(region, fault_page_addr, len);
+                return self.populate_from_file(&region.clone(), fault_page_addr, len);
             }
         }
 
@@ -164,12 +303,65 @@ impl UffdHandler {
         );
     }
 
-    fn populate_from_file(&self, region: &GuestRegionUffdMapping, dst: u64, len: usize) -> bool {
-        let offset = dst - region.base_host_virt_addr;
-        let src = self.backing_buffer as u64 + region.offset + offset;
+    pub fn size(&self) -> usize {
+        self.mem_regions.iter().map(|r| r.size).sum()
+    }
+
+    pub fn populate_via_write(&mut self, offset: usize, len: usize) -> usize {
+        // man 2 write:
+        //
+        //    On Linux, write() (and similar system calls) will transfer at most
+        //    0x7ffff000 (2,147,479,552) bytes, returning the number of bytes
+        //    actually transferred.  (This is true on both 32-bit and 64-bit
+        //    systems.)
+        const MAX_WRITE_LEN: usize = 2_147_479_552;
+
+        assert!(
+            offset.checked_add(len).unwrap() <= self.size(),
+            "{} + {} >= {}",
+            offset,
+            len,
+            self.size()
+        );
 
+        let mut total_written = 0;
+
+        while total_written < len {
+            let src = unsafe { self.backing_buffer.add(offset + total_written) };
+            let len_to_write = (len - total_written).min(MAX_WRITE_LEN);
+            let bytes_written = unsafe {
+                libc::pwrite64(
+                    self.guest_memfd.as_ref().unwrap().as_raw_fd(),
+                    src.cast(),
+                    len_to_write,
+                    (offset + total_written) as libc::off64_t,
+                )
+            };
+
+            let bytes_written = match bytes_written {
+                -1 if vmm_sys_util::errno::Error::last().errno() == libc::ENOSPC => 0,
+                written @ 0.. => written as usize,
+                _ => panic!("{:?}", std::io::Error::last_os_error()),
+            };
+
+            self.userfault_bitmap
+                .as_mut()
+                .unwrap()
+                .reset_addr_range(offset + total_written, bytes_written);
+
+            total_written += bytes_written;
+
+            if bytes_written != len_to_write {
+                break;
+            }
+        }
+
+        total_written
+    }
+
+    fn populate_via_uffdio_copy(&self, src: *const u8, dst: u64, len: usize) -> bool {
         unsafe {
-            match self.uffd.copy(src as *const _, dst as *mut _, len, true) {
+            match self.uffd.copy(src.cast(), dst as *mut _, len, true) {
                 // Make sure the UFFD copied some bytes.
                 Ok(value) => assert!(value > 0),
                 // Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD
@@ -194,6 +386,44 @@ impl UffdHandler {
         true
     }
 
+    fn populate_via_memcpy(&mut self, src: *const u8, dst: u64, offset: usize, len: usize) -> bool {
+        let dst_memcpy = unsafe {
+            self.guest_memfd_addr
+                .expect("no guest_memfd addr")
+                .add(offset)
+        };
+
+        unsafe {
+            std::ptr::copy_nonoverlapping(src, dst_memcpy, len);
+        }
+
+        self.userfault_bitmap
+            .as_mut()
+            .unwrap()
+            .reset_addr_range(offset, len);
+
+        self.uffd
+            .r#continue(dst as _, len, true)
+            .expect("uffd_continue");
+
+        true
+    }
+
+    fn populate_from_file(
+        &mut self,
+        region: &GuestRegionUffdMapping,
+        dst: u64,
+        len: usize,
+    ) -> bool {
+        let offset = (region.offset + dst - region.base_host_virt_addr) as usize;
+        let src = unsafe { self.backing_buffer.add(offset) };
+
+        match self.guest_memfd {
+            Some(_) => self.populate_via_memcpy(src, dst, offset, len),
+            None => self.populate_via_uffdio_copy(src, dst, len),
+        }
+    }
+
     fn zero_out(&mut self, addr: u64) -> bool {
         match unsafe { self.uffd.zeropage(addr as *mut _, self.page_size, true) } {
             Ok(_) => true,
@@ -203,13 +433,65 @@ impl UffdHandler {
     }
 }
 
+struct UffdMsgIterator {
+    stream: UnixStream,
+    buffer: Vec<u8>,
+    current_pos: usize,
+}
+
+impl Iterator for UffdMsgIterator {
+    type Item = FaultRequest;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.stream.read(&mut self.buffer[self.current_pos..]) {
+            Ok(bytes_read) => self.current_pos += bytes_read,
+            Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => {
+                // Continue with existing buffer data
+            }
+            Err(e) => panic!("Failed to read from stream: {}", e,),
+        }
+
+        if self.current_pos == 0 {
+            return None;
+        }
+
+        let str_slice = std::str::from_utf8(&self.buffer[..self.current_pos]).unwrap();
+        let mut stream: StreamDeserializer<_, Self::Item> =
+            Deserializer::from_str(str_slice).into_iter();
+
+        match stream.next()? {
+            Ok(value) => {
+                let consumed = stream.byte_offset();
+                self.buffer.copy_within(consumed..self.current_pos, 0);
+                self.current_pos -= consumed;
+                Some(value)
+            }
+            Err(e) => panic!(
+                "Failed to deserialize JSON message: {}. Error: {}",
+                String::from_utf8_lossy(&self.buffer[..self.current_pos]),
+                e
+            ),
+        }
+    }
+}
+
+impl UffdMsgIterator {
+    fn new(stream: UnixStream) -> Self {
+        Self {
+            stream,
+            buffer: vec![0u8; 4096],
+            current_pos: 0,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct Runtime {
     stream: UnixStream,
     backing_file: File,
     backing_memory: *mut u8,
     backing_memory_size: usize,
-    uffds: HashMap<i32, UffdHandler>,
+    handler: UffdHandler,
 }
 
 impl Runtime {
@@ -234,12 +516,14 @@ impl Runtime {
             panic!("mmap on backing file failed");
         }
 
+        let handler = Runtime::construct_handler(&stream, ret.cast(), backing_memory_size);
+
         Self {
             stream,
             backing_file,
             backing_memory: ret.cast(),
             backing_memory_size,
-            uffds: HashMap::default(),
+            handler,
         }
     }
 
@@ -280,12 +564,59 @@ impl Runtime {
         }));
     }
 
+    pub fn send_fault_reply(&mut self, fault_reply: FaultReply) {
+        let reply = UffdMsgToFirecracker::FaultRep(fault_reply);
+        let reply_json = serde_json::to_string(&reply).unwrap();
+        self.stream.write_all(reply_json.as_bytes()).unwrap();
+    }
+
+    pub fn construct_handler(
+        stream: &UnixStream,
+        backing_memory: *mut u8,
+        backing_memory_size: usize,
+    ) -> UffdHandler {
+        let mut message_buf = vec![0u8; 1024];
+        let mut iovecs = [libc::iovec {
+            iov_base: message_buf.as_mut_ptr() as *mut libc::c_void,
+            iov_len: message_buf.len(),
+        }];
+        let mut fds = [0; 3];
+        let (bytes_read, fds_read) = unsafe {
+            stream
+                .recv_with_fds(&mut iovecs, &mut fds)
+                .expect("recv_with_fds failed")
+        };
+        message_buf.resize(bytes_read, 0);
+
+        let (guest_memfd, userfault_bitmap_memfd) = if fds_read == 3 {
+            (
+                Some(unsafe { File::from_raw_fd(fds[1]) }),
+                Some(unsafe { File::from_raw_fd(fds[2]) }),
+            )
+        } else {
+            (None, None)
+        };
+
+        UffdHandler::from_mappings(
+            serde_json::from_slice(message_buf.as_slice()).unwrap(),
+            unsafe { File::from_raw_fd(fds[0]) },
+            guest_memfd,
+            userfault_bitmap_memfd,
+            backing_memory,
+            backing_memory_size,
+        )
+    }
+
     /// Polls the `UnixStream` and UFFD fds in a loop.
     /// When stream is polled, new uffd is retrieved.
     /// When uffd is polled, page fault is handled by
     /// calling `pf_event_dispatch` with corresponding
     /// uffd object passed in.
-    pub fn run(&mut self, pf_event_dispatch: impl Fn(&mut UffdHandler)) {
+    pub fn run(
+        &mut self,
+        pf_event_dispatch: impl Fn(&mut UffdHandler),
+        pf_vcpu_event_dispatch: impl Fn(&mut UffdHandler, usize),
+    ) {
         let mut pollfds = vec![];
 
         // Poll the stream for incoming uffds
@@ -295,6 +626,15 @@ impl Runtime {
             revents: 0,
         });
 
+        pollfds.push(libc::pollfd {
+            fd: self.handler.uffd.as_raw_fd(),
+            events: libc::POLLIN,
+            revents: 0,
+        });
+
+        let mut uffd_msg_iter =
+            UffdMsgIterator::new(self.stream.try_clone().expect("Failed to clone stream"));
+
         loop {
             let pollfd_ptr = pollfds.as_mut_ptr();
             let pollfd_size = pollfds.len() as u64;
@@ -307,28 +647,32 @@ impl Runtime {
                 panic!("Could not poll for events!")
             }
 
-            for i in 0..pollfds.len() {
+            for fd in &pollfds {
                 if nready == 0 {
                     break;
                 }
-                if pollfds[i].revents & libc::POLLIN != 0 {
+                if fd.revents & libc::POLLIN != 0 {
                     nready -= 1;
-                    if pollfds[i].fd == self.stream.as_raw_fd() {
-                        // Handle new uffd from stream
-                        let handler = UffdHandler::from_unix_stream(
-                            &self.stream,
-                            self.backing_memory,
-                            self.backing_memory_size,
-                        );
-                        pollfds.push(libc::pollfd {
-                            fd: handler.uffd.as_raw_fd(),
-                            events: libc::POLLIN,
-                            revents: 0,
-                        });
-                        self.uffds.insert(handler.uffd.as_raw_fd(), handler);
+                    if fd.fd == self.stream.as_raw_fd() {
+                        for fault_request in uffd_msg_iter.by_ref() {
+                            let page_size = self.handler.page_size;
+
+                            assert!(
+                                (fault_request.offset as usize) < self.handler.size(),
+                                "received bogus offset from firecracker"
+                            );
+
+                            // Handle one of FaultRequest page faults
+                            pf_vcpu_event_dispatch(
+                                &mut self.handler,
+                                fault_request.offset as usize,
+                            );
+
+                            self.send_fault_reply(fault_request.into_reply(page_size as u64));
+                        }
                     } else {
                         // Handle one of uffd page faults
-                        pf_event_dispatch(self.uffds.get_mut(&pollfds[i].fd).unwrap());
+                        pf_event_dispatch(&mut self.handler);
                     }
                 }
             }
@@ -372,7 +716,7 @@ mod tests {
             let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
             // Update runtime with actual runtime
             let runtime = uninit_runtime.write(Runtime::new(stream, file));
-            runtime.run(|_: &mut UffdHandler| {});
+            runtime.run(|_: &mut UffdHandler| {}, |_: &mut UffdHandler, _: usize| {});
         });
 
         // wait for runtime thread to initialize itself
@@ -381,6 +725,7 @@ mod tests {
         let stream =
             UnixStream::connect(dummy_socket_path_clone).expect("Cannot connect to the socket");
 
+        #[allow(deprecated)]
         let dummy_memory_region = vec![GuestRegionUffdMapping {
             base_host_virt_addr: 0,
             size: 0x1000,
@@ -389,31 +734,26 @@ mod tests {
         }];
         let dummy_memory_region_json = serde_json::to_string(&dummy_memory_region).unwrap();
 
-        let dummy_file_1 = TempFile::new().unwrap();
-        let dummy_fd_1 = dummy_file_1.as_file().as_raw_fd();
-        stream
-            .send_with_fd(dummy_memory_region_json.as_bytes(), dummy_fd_1)
-            .unwrap();
-        // wait for the runtime thread to process message
-        std::thread::sleep(std::time::Duration::from_millis(100));
-        unsafe {
-            assert_eq!((*runtime_ptr).uffds.len(), 1);
-        }
-
-        let dummy_file_2 = TempFile::new().unwrap();
-        let dummy_fd_2 = dummy_file_2.as_file().as_raw_fd();
+        // Send the mapping message to the runtime.
+        // We expect for the runtime to create a corresponding UffdHandler
+        let dummy_file = TempFile::new().unwrap();
+        let dummy_fd = dummy_file.as_file().as_raw_fd();
         stream
-            .send_with_fd(dummy_memory_region_json.as_bytes(), dummy_fd_2)
+            .send_with_fd(dummy_memory_region_json.as_bytes(), dummy_fd)
             .unwrap();
         // wait for the runtime thread to process message
         std::thread::sleep(std::time::Duration::from_millis(100));
         unsafe {
-            assert_eq!((*runtime_ptr).uffds.len(), 2);
+            assert_eq!(
+                (*runtime_ptr).handler.mem_regions.len(),
+                dummy_memory_region.len()
+            );
         }
 
         // there is no way to properly stop runtime, so
         // we send a message with an incorrect memory region
         // to cause runtime thread to panic
+        #[allow(deprecated)]
         let error_memory_region = vec![GuestRegionUffdMapping {
             base_host_virt_addr: 0,
             size: 0,
@@ -422,7 +762,7 @@ mod tests {
         }];
         let error_memory_region_json = serde_json::to_string(&error_memory_region).unwrap();
         stream
-            .send_with_fd(error_memory_region_json.as_bytes(), dummy_fd_2)
+            .send_with_fd(error_memory_region_json.as_bytes(), dummy_fd)
             .unwrap();
 
         runtime_thread.join().unwrap_err();
diff --git a/src/firecracker/examples/uffd/uffd_utils/userfault_bitmap.rs b/src/firecracker/examples/uffd/uffd_utils/userfault_bitmap.rs
new file mode 100644
index 00000000000..7a751fa0ef2
--- /dev/null
+++ b/src/firecracker/examples/uffd/uffd_utils/userfault_bitmap.rs
@@ -0,0 +1,203 @@
+// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::num::NonZeroUsize;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// `UserfaultBitmap` implements a simple bit map on the page level with test and set operations.
+/// It is page-size aware, so it converts addresses to page numbers before setting or clearing
+/// the bits.
+#[derive(Debug)]
+pub struct UserfaultBitmap {
+    map: *mut AtomicU64,
+    size: usize,
+    byte_size: usize,
+    page_size: NonZeroUsize,
+    map_size: usize,
+}
+
+impl UserfaultBitmap {
+    /// Create a new bitmap using a user-supplied pointer.
+    ///
+    /// # Safety
+    ///
+    /// Caller must ensure:
+    /// * `map_ptr` points to a valid region of memory containing initialized `AtomicU64` elements
+    /// * `map_ptr` is properly aligned for `AtomicU64`
+    /// * The memory region contains enough space for `ceil(ceil(byte_size/page_size)/64)` elements
+    /// * The memory region pointed to by `map_ptr` must not be accessed through any other means
+    ///   while this `UserfaultBitmap` exists
+    /// * The caller must ensure the memory remains valid for the lifetime of the returned
+    ///   `UserfaultBitmap`
+    pub unsafe fn new(map_ptr: *mut AtomicU64, byte_size: usize, page_size: NonZeroUsize) -> Self {
+        let num_pages = byte_size.div_ceil(page_size.get());
+        let map_size = num_pages.div_ceil(u64::BITS as usize);
+
+        UserfaultBitmap {
+            map: map_ptr,
+            size: num_pages,
+            byte_size,
+            page_size,
+            map_size,
+        }
+    }
+
+    /// Is bit `n` set? Bits outside the range of the bitmap are always unset.
+    pub fn is_bit_set(&self, index: usize) -> bool {
+        if index < self.size {
+            unsafe {
+                let map_entry = &*self.map.add(index >> 6);
+                (map_entry.load(Ordering::Acquire) & (1 << (index & 63))) != 0
+            }
+        } else {
+            // Out-of-range bits are always unset.
+            false
+        }
+    }
+
+    /// Reset a range of `len` bytes starting at `start_addr`. The first bit set in the bitmap
+    /// is for the page corresponding to `start_addr`, and the last bit that we set corresponds
+    /// to address `start_addr + len - 1`.
+    pub fn reset_addr_range(&self, start_addr: usize, len: usize) {
+        if len == 0 {
+            return;
+        }
+
+        let first_bit = start_addr / self.page_size;
+        let last_bit = start_addr.saturating_add(len - 1) / self.page_size;
+
+        for n in first_bit..=last_bit {
+            if n >= self.size {
+                break;
+            }
+            unsafe {
+                let map_entry = &*self.map.add(n >> 6);
+                map_entry.fetch_and(!(1 << (n & 63)), Ordering::SeqCst);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::atomic::AtomicU64;
+
+    use super::*;
+
+    // Helper function to create a test bitmap
+    fn setup_test_bitmap(
+        byte_size: usize,
+        page_size: NonZeroUsize,
+    ) -> (Vec<AtomicU64>, UserfaultBitmap) {
+        let num_pages = byte_size.div_ceil(page_size.get());
+        let map_size = num_pages.div_ceil(u64::BITS as usize);
+        let mut memory = Vec::with_capacity(map_size);
+        for _ in 0..map_size {
+            memory.push(AtomicU64::new(0));
+        }
+        let ptr = memory.as_mut_ptr();
+        let bitmap = unsafe { UserfaultBitmap::new(ptr, byte_size, page_size) };
+        (memory, bitmap)
+    }
+
+    #[test]
+    fn test_basic_initialization() {
+        let page_size = NonZeroUsize::new(128).unwrap();
+        let (_memory, bitmap) = setup_test_bitmap(1024, page_size);
+
+        assert!(!bitmap.is_bit_set(0));
+        assert!(!bitmap.is_bit_set(7));
+    }
+
+    #[test]
+    fn test_out_of_bounds_access() {
+        let page_size = NonZeroUsize::new(128).unwrap();
+        let (_memory, bitmap) = setup_test_bitmap(1024, page_size);
+
+        // With 1024 bytes and 128-byte pages, we should have 8 pages
+        assert!(!bitmap.is_bit_set(8)); // This should be out of bounds
+        assert!(!bitmap.is_bit_set(100)); // This should be out of bounds
+    }
+
+    #[test]
+    fn test_reset_addr_range() {
+        let page_size = NonZeroUsize::new(128).unwrap();
+        let (memory, bitmap) = setup_test_bitmap(1024, page_size);
+
+        // Set bits 0 and 1 (representing first two pages)
+        memory[0].store(0b11, Ordering::SeqCst);
+
+        // Verify bits are set
+        assert!(bitmap.is_bit_set(0));
+        assert!(bitmap.is_bit_set(1));
+        assert!(!bitmap.is_bit_set(2));
+
+        // Reset first page
+        bitmap.reset_addr_range(0, 128);
+
+        // Verify first bit is reset but second remains set
+        assert!(!bitmap.is_bit_set(0));
+        assert!(bitmap.is_bit_set(1));
+    }
+
+    #[test]
+    fn test_reset_addr_range_spanning_multiple_words() {
+        let page_size = NonZeroUsize::new(128).unwrap();
+        // Ensure we allocate enough space for at least 2 words (128 bits)
+        let (memory, bitmap) = setup_test_bitmap(128 * 128, page_size); // 128 pages
+
+        // Set bits in different words
+        memory[0].store(u64::MAX, Ordering::SeqCst);
+        memory[1].store(u64::MAX, Ordering::SeqCst);
+
+        // Reset a range spanning both words
+        bitmap.reset_addr_range(63 * 128, 256); // Reset bits 63 and 64
+
+        // Check bits are reset
+        assert!(!bitmap.is_bit_set(63));
+        assert!(!bitmap.is_bit_set(64));
+        // Check adjacent bits are still set
+        assert!(bitmap.is_bit_set(62));
+        assert!(bitmap.is_bit_set(65));
+    }
+
+    #[test]
+    fn test_reset_addr_range_zero_length() {
+        let page_size = NonZeroUsize::new(128).unwrap();
+        let (memory, bitmap) = setup_test_bitmap(1024, page_size);
+
+        // Set a bit manually
+        memory[0].store(1, Ordering::SeqCst);
+
+        // Reset with length 0
+        bitmap.reset_addr_range(0, 0);
+
+        // Bit should still be set
+        assert!(bitmap.is_bit_set(0));
+    }
+
+    #[test]
+    fn test_reset_addr_range_beyond_bounds() {
+        let page_size = NonZeroUsize::new(128).unwrap();
+        let (_memory, bitmap) = setup_test_bitmap(1024, page_size);
+
+        // This should not panic
+        bitmap.reset_addr_range(1024, 2048);
+    }
+
+    #[test]
+    fn test_edge_cases() {
+        // Test with minimum page size
+        let page_size = NonZeroUsize::new(1).unwrap();
+        let (_memory, bitmap) = setup_test_bitmap(64, page_size);
+        assert!(!bitmap.is_bit_set(0));
+
+        // Test with zero byte_size
+        let page_size = NonZeroUsize::new(128).unwrap();
+        let (_memory, bitmap) = setup_test_bitmap(0, page_size);
+        assert!(!bitmap.is_bit_set(0));
+
+        // Test reset_addr_range with maximum usize value
+        bitmap.reset_addr_range(usize::MAX - 128, 256);
+    }
+}
diff --git a/src/firecracker/src/api_server/request/machine_configuration.rs b/src/firecracker/src/api_server/request/machine_configuration.rs
index 2e8addffb74..0edb79f3774 100644
--- a/src/firecracker/src/api_server/request/machine_configuration.rs
+++ b/src/firecracker/src/api_server/request/machine_configuration.rs
@@ -119,6 +119,7 @@ mod tests {
             let expected_config = MachineConfigUpdate {
                 vcpu_count: Some(8),
                 mem_size_mib: Some(1024),
+                secret_free: Some(false),
                 smt: Some(false),
                 cpu_template: None,
                 track_dirty_pages: Some(false),
@@ -140,6 +141,7 @@ mod tests {
         let expected_config = MachineConfigUpdate {
             vcpu_count: Some(8),
             mem_size_mib: Some(1024),
+            secret_free: Some(false),
             smt: Some(false),
             cpu_template: Some(StaticCpuTemplate::None),
             track_dirty_pages: Some(false),
@@ -161,6 +163,7 @@ mod tests {
         let expected_config = MachineConfigUpdate {
             vcpu_count: Some(8),
             mem_size_mib: Some(1024),
+            secret_free: Some(false),
             smt: Some(false),
             cpu_template: None,
             track_dirty_pages: Some(true),
@@ -186,6 +189,7 @@ mod tests {
             let expected_config = MachineConfigUpdate {
                 vcpu_count: Some(8),
                 mem_size_mib: Some(1024),
+                secret_free: Some(false),
                 smt: Some(false),
                 cpu_template: Some(StaticCpuTemplate::T2),
                 track_dirty_pages: Some(true),
@@ -213,6 +217,7 @@ mod tests {
         let expected_config = MachineConfigUpdate {
             vcpu_count: Some(8),
             mem_size_mib: Some(1024),
+            secret_free: Some(false),
             smt: Some(true),
             cpu_template: None,
             track_dirty_pages: Some(true),
diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml
index 598db98229e..d97a9364bdc 100644
--- a/src/firecracker/swagger/firecracker.yaml
+++ b/src/firecracker/swagger/firecracker.yaml
@@ -1065,6 +1065,11 @@ definitions:
       mem_size_mib:
         type: integer
         description: Memory size of VM
+      secret_free:
+        type: boolean
+        description:
+          If enabled, guest memory will be unmapped from the host kernel's address space, providing additional
+          protection against transitive execution issues. All I/O then goes through a bounce buffer.
       track_dirty_pages:
         type: boolean
         description:
diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml
index 6aada3d9026..5d67d04b9a9 100644
--- a/src/vmm/Cargo.toml
+++ b/src/vmm/Cargo.toml
@@ -48,7 +48,7 @@ serde_json = "1.0.143"
 slab = "0.4.11"
 thiserror = "2.0.16"
 timerfd = "1.5.0"
-userfaultfd = "0.9.0"
+userfaultfd = { version = "0.9.0", features = ["linux5_13"] }
 utils = { path = "../utils" }
 uuid = "1.18.1"
 vhost = { version = "0.14.0", features = ["vhost-user-frontend"] }
diff --git a/src/vmm/benches/memory_access.rs b/src/vmm/benches/memory_access.rs
index a272aceceaa..9aac5633118 100644
--- a/src/vmm/benches/memory_access.rs
+++ b/src/vmm/benches/memory_access.rs
@@ -11,7 +11,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
     c.bench_function("page_fault", |b| {
         b.iter_batched(
             || {
-                let memory = configuration.allocate_guest_memory().unwrap();
+                let memory = configuration.allocate_guest_memory(None).unwrap();
                 // Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
                 // 1)`, because on ARM64 guest memory does not start at physical
                 // address 0).
diff --git a/src/vmm/src/arch/aarch64/fdt.rs b/src/vmm/src/arch/aarch64/fdt.rs
index 9946d3516cc..d7856190022 100644
--- a/src/vmm/src/arch/aarch64/fdt.rs
+++ b/src/vmm/src/arch/aarch64/fdt.rs
@@ -555,7 +555,7 @@ mod tests {
         let mut event_manager = EventManager::new().unwrap();
         let mut device_manager = default_device_manager();
         let kvm = Kvm::new(vec![]).unwrap();
-        let vm = Vm::new(&kvm).unwrap();
+        let vm = Vm::new(&kvm, false).unwrap();
         let gic = create_gic(vm.fd(), 1, None).unwrap();
         let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
         cmdline.insert("console", "/dev/tty0").unwrap();
@@ -585,7 +585,7 @@ mod tests {
         let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000);
         let mut device_manager = default_device_manager();
         let kvm = Kvm::new(vec![]).unwrap();
-        let vm = Vm::new(&kvm).unwrap();
+        let vm = Vm::new(&kvm, false).unwrap();
         let gic = create_gic(vm.fd(), 1, None).unwrap();
         let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
         cmdline.insert("console", "/dev/tty0").unwrap();
@@ -608,7 +608,7 @@ mod tests {
         let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000);
         let device_manager = default_device_manager();
         let kvm = Kvm::new(vec![]).unwrap();
-        let vm = Vm::new(&kvm).unwrap();
+        let vm = Vm::new(&kvm, false).unwrap();
         let gic = create_gic(vm.fd(), 1, None).unwrap();
 
         let saved_dtb_bytes = match gic.fdt_compatibility() {
@@ -665,7 +665,7 @@ mod tests {
         let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000);
         let device_manager = default_device_manager();
         let kvm = Kvm::new(vec![]).unwrap();
-        let vm = Vm::new(&kvm).unwrap();
+        let vm = Vm::new(&kvm, false).unwrap();
         let gic = create_gic(vm.fd(), 1, None).unwrap();
 
         let saved_dtb_bytes = match gic.fdt_compatibility() {
diff --git a/src/vmm/src/arch/aarch64/mod.rs b/src/vmm/src/arch/aarch64/mod.rs
index 74c5204af0e..d7e1deb0363 100644
--- a/src/vmm/src/arch/aarch64/mod.rs
+++ b/src/vmm/src/arch/aarch64/mod.rs
@@ -18,11 +18,11 @@ pub mod vm;
 
 use std::cmp::min;
 use std::fmt::Debug;
-use std::fs::File;
+use std::io::{Read, Seek};
 
 use linux_loader::loader::pe::PE as Loader;
 use linux_loader::loader::{Cmdline, KernelLoader};
-use vm_memory::GuestMemoryError;
+use vm_memory::{GuestMemoryError, ReadVolatile};
 
 use crate::arch::{BootProtocol, EntryPoint, arch_memory_regions_with_gap};
 use crate::cpu_config::aarch64::{CpuConfiguration, CpuConfigurationError};
@@ -179,16 +179,10 @@ fn get_fdt_addr(mem: &GuestMemoryMmap) -> u64 {
 }
 
 /// Load linux kernel into guest memory.
-pub fn load_kernel(
-    kernel: &File,
+pub fn load_kernel<R: ReadVolatile + Read + Seek>(
+    mut kernel_file: R,
     guest_memory: &GuestMemoryMmap,
 ) -> Result<EntryPoint, ConfigurationError> {
-    // Need to clone the File because reading from it
-    // mutates it.
-    let mut kernel_file = kernel
-        .try_clone()
-        .map_err(|_| ConfigurationError::KernelFile)?;
-
     let entry_addr = Loader::load(
         guest_memory,
         Some(GuestAddress(get_kernel_start())),
diff --git a/src/vmm/src/arch/aarch64/vm.rs b/src/vmm/src/arch/aarch64/vm.rs
index eaec0932a42..f1d4b845277 100644
--- a/src/vmm/src/arch/aarch64/vm.rs
+++ b/src/vmm/src/arch/aarch64/vm.rs
@@ -33,8 +33,8 @@ pub enum ArchVmError {
 
 impl ArchVm {
     /// Create a new `Vm` struct.
-    pub fn new(kvm: &Kvm) -> Result<ArchVm, VmError> {
-        let common = Self::create_common(kvm)?;
+    pub fn new(kvm: &Kvm, secret_free: bool) -> Result<ArchVm, VmError> {
+        let common = Self::create_common(kvm, secret_free)?;
         Ok(ArchVm {
             common,
             irqchip_handle: None,
diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs
index b18267c6a1e..16c9adbbf86 100644
--- a/src/vmm/src/arch/x86_64/mod.rs
+++ b/src/vmm/src/arch/x86_64/mod.rs
@@ -31,7 +31,7 @@ pub mod xstate;
 #[allow(missing_docs)]
 pub mod generated;
 
-use std::fs::File;
+use std::io::{Read, Seek};
 
 use kvm::Kvm;
 use layout::{
@@ -48,6 +48,7 @@ use linux_loader::loader::elf::start_info::{
 };
 use linux_loader::loader::{Cmdline, KernelLoader, PvhBootCapability, load_cmdline};
 use log::debug;
+use vm_memory::ReadVolatile;
 
 use super::EntryPoint;
 use crate::acpi::create_acpi_tables;
@@ -466,20 +467,14 @@ fn add_e820_entry(
 }
 
 /// Load linux kernel into guest memory.
-pub fn load_kernel(
-    kernel: &File,
+pub fn load_kernel<R: Read + ReadVolatile + Seek>(
+    mut kernel: R,
     guest_memory: &GuestMemoryMmap,
 ) -> Result<EntryPoint, ConfigurationError> {
-    // Need to clone the File because reading from it
-    // mutates it.
-    let mut kernel_file = kernel
-        .try_clone()
-        .map_err(|_| ConfigurationError::KernelFile)?;
-
     let entry_addr = Loader::load(
         guest_memory,
         None,
-        &mut kernel_file,
+        &mut kernel,
         Some(GuestAddress(get_kernel_start())),
     )
     .map_err(ConfigurationError::KernelLoader)?;
diff --git a/src/vmm/src/arch/x86_64/vm.rs b/src/vmm/src/arch/x86_64/vm.rs
index b71d18ae37b..739a7e04d0e 100644
--- a/src/vmm/src/arch/x86_64/vm.rs
+++ b/src/vmm/src/arch/x86_64/vm.rs
@@ -65,8 +65,8 @@ pub struct ArchVm {
 
 impl ArchVm {
     /// Create a new `Vm` struct.
-    pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result<ArchVm, VmError> {
-        let common = Self::create_common(kvm)?;
+    pub fn new(kvm: &crate::vstate::kvm::Kvm, secret_free: bool) -> Result<ArchVm, VmError> {
+        let common = Self::create_common(kvm, secret_free)?;
 
         let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?;
 
diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs
index dbfe4232381..1b6a6f6c886 100644
--- a/src/vmm/src/builder.rs
+++ b/src/vmm/src/builder.rs
@@ -4,27 +4,33 @@
 //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM.
 
 use std::fmt::Debug;
-use std::io;
+use std::fs::File;
+use std::io::{self};
+use std::os::fd::{AsFd, AsRawFd};
+use std::os::unix::fs::MetadataExt;
 #[cfg(feature = "gdb")]
 use std::sync::mpsc;
 use std::sync::{Arc, Mutex};
 
 use event_manager::SubscriberOps;
+use kvm_ioctls::Cap;
 use linux_loader::cmdline::Cmdline as LoaderKernelCmdline;
-use userfaultfd::Uffd;
 use utils::time::TimestampUs;
 #[cfg(target_arch = "aarch64")]
 use vm_memory::GuestAddress;
 
 #[cfg(target_arch = "aarch64")]
 use crate::Vcpu;
-use crate::arch::{ConfigurationError, configure_system_for_boot, load_kernel};
+use crate::arch::{ConfigurationError, configure_system_for_boot, host_page_size, load_kernel};
 #[cfg(target_arch = "aarch64")]
 use crate::construct_kvm_mpidrs;
-use crate::cpu_config::templates::{GetCpuTemplate, GetCpuTemplateError, GuestConfigError};
+use crate::cpu_config::templates::{
+    GetCpuTemplate, GetCpuTemplateError, GuestConfigError, KvmCapability,
+};
 #[cfg(target_arch = "x86_64")]
 use crate::device_manager;
 use crate::device_manager::pci_mngr::PciManagerError;
+use crate::device_manager::persist::ACPIDeviceManagerRestoreError;
 use crate::device_manager::{
     AttachDeviceError, DeviceManager, DeviceManagerCreateError, DevicePersistError,
     DeviceRestoreArgs,
@@ -39,18 +45,23 @@ use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend};
 use crate::gdb;
 use crate::initrd::{InitrdConfig, InitrdError};
 use crate::logger::debug;
-use crate::persist::{MicrovmState, MicrovmStateError};
+use crate::persist::{
+    GuestMemoryFromFileError, GuestMemoryFromUffdError, MicrovmState, MicrovmStateError,
+    guest_memory_from_file, guest_memory_from_uffd,
+};
 use crate::resources::VmResources;
 use crate::seccomp::BpfThreadMap;
 use crate::snapshot::Persist;
+use crate::utils::u64_to_usize;
 use crate::vmm_config::instance_info::InstanceInfo;
 use crate::vmm_config::machine_config::MachineConfigError;
+use crate::vmm_config::snapshot::{LoadSnapshotParams, MemBackendType};
 use crate::vstate::kvm::{Kvm, KvmError};
-use crate::vstate::memory::GuestRegionMmap;
+use crate::vstate::memory::{MaybeBounce, MemoryError, create_memfd};
 #[cfg(target_arch = "aarch64")]
 use crate::vstate::resources::ResourceAllocator;
 use crate::vstate::vcpu::VcpuError;
-use crate::vstate::vm::{Vm, VmError};
+use crate::vstate::vm::{GUEST_MEMFD_FLAG_MMAP, GUEST_MEMFD_FLAG_NO_DIRECT_MAP, Vm, VmError};
 use crate::{EventManager, Vmm, VmmError};
 
 /// Errors associated with starting the instance.
@@ -130,6 +141,9 @@ impl std::convert::From<linux_loader::cmdline::Error> for StartMicrovmError {
     }
 }
 
+const KVM_CAP_GUEST_MEMFD_MMAP: u32 = 243;
+const KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP: u32 = 244;
+
 /// Builds and starts a microVM based on the current Firecracker VmResources configuration.
 ///
 /// The built microVM and all the created vCPUs start off in the paused state.
@@ -150,10 +164,6 @@ pub fn build_microvm_for_boot(
         .as_ref()
         .ok_or(StartMicrovmError::MissingKernelConfig)?;
 
-    let guest_memory = vm_resources
-        .allocate_guest_memory()
-        .map_err(StartMicrovmError::GuestMemory)?;
-
     // Clone the command-line so that a failed boot doesn't pollute the original.
     #[allow(unused_mut)]
     let mut boot_cmdline = boot_config.cmdline.clone();
@@ -163,12 +173,40 @@ pub fn build_microvm_for_boot(
         .cpu_template
         .get_cpu_template()?;
 
-    let kvm = Kvm::new(cpu_template.kvm_capabilities.clone())?;
+    let secret_free = vm_resources.machine_config.secret_free;
+
+    let mut kvm_capabilities = cpu_template.kvm_capabilities.clone();
+
+    if secret_free {
+        kvm_capabilities.push(KvmCapability::Add(Cap::GuestMemfd as u32));
+        kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GUEST_MEMFD_MMAP));
+        kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP));
+    }
+
+    let kvm = Kvm::new(kvm_capabilities)?;
     // Set up Kvm Vm and register memory regions.
     // Build custom CPU config if a custom template is provided.
-    let mut vm = Vm::new(&kvm)?;
-    let (mut vcpus, vcpus_exit_evt) = vm.create_vcpus(vm_resources.machine_config.vcpu_count)?;
-    vm.register_memory_regions(guest_memory)?;
+    let mut vm = Vm::new(&kvm, secret_free)?;
+    let (mut vcpus, vcpus_exit_evt) =
+        vm.create_vcpus(vm_resources.machine_config.vcpu_count, secret_free)?;
+
+    let guest_memfd = match secret_free {
+        true => Some(
+            vm.create_guest_memfd(
+                vm_resources.memory_size(),
+                GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_NO_DIRECT_MAP,
+            )
+            .map_err(VmmError::Vm)?,
+        ),
+        false => None,
+    };
+
+    let guest_memory = vm_resources
+        .allocate_guest_memory(guest_memfd)
+        .map_err(StartMicrovmError::GuestMemory)?;
+
+    vm.register_memory_regions(guest_memory, None)
+        .map_err(VmmError::Vm)?;
 
     let mut device_manager = DeviceManager::new(
         event_manager,
@@ -179,8 +217,28 @@ pub fn build_microvm_for_boot(
 
     let vm = Arc::new(vm);
 
-    let entry_point = load_kernel(&boot_config.kernel_file, vm.guest_memory())?;
-    let initrd = InitrdConfig::from_config(boot_config, vm.guest_memory())?;
+    let entry_point = load_kernel(
+        MaybeBounce::<_, 4096>::new_persistent(
+            boot_config.kernel_file.try_clone().unwrap(),
+            secret_free,
+        ),
+        vm.guest_memory(),
+    )?;
+    let initrd = match &boot_config.initrd_file {
+        Some(initrd_file) => {
+            let size = initrd_file
+                .metadata()
+                .map_err(InitrdError::Metadata)?
+                .size();
+
+            Some(InitrdConfig::from_reader(
+                vm.guest_memory(),
+                MaybeBounce::<_, 4096>::new_persistent(initrd_file.as_fd(), secret_free),
+                u64_to_usize(size),
+            )?)
+        }
+        None => None,
+    };
 
     #[cfg(feature = "gdb")]
     let (gdb_tx, gdb_rx) = mpsc::channel();
@@ -214,6 +272,7 @@ pub fn build_microvm_for_boot(
             &mut boot_cmdline,
             balloon,
             event_manager,
+            vm_resources.machine_config.secret_free,
         )?;
     }
 
@@ -223,6 +282,7 @@ pub fn build_microvm_for_boot(
         &mut boot_cmdline,
         vm_resources.block.devices.iter(),
         event_manager,
+        vm_resources.machine_config.secret_free,
     )?;
     attach_net_devices(
         &mut device_manager,
@@ -230,6 +290,7 @@ pub fn build_microvm_for_boot(
         &mut boot_cmdline,
         vm_resources.net_builder.iter(),
         event_manager,
+        vm_resources.machine_config.secret_free,
     )?;
 
     if let Some(unix_vsock) = vm_resources.vsock.get() {
@@ -239,6 +300,7 @@ pub fn build_microvm_for_boot(
             &mut boot_cmdline,
             unix_vsock,
             event_manager,
+            vm_resources.machine_config.secret_free,
         )?;
     }
 
@@ -249,6 +311,7 @@ pub fn build_microvm_for_boot(
             &mut boot_cmdline,
             entropy,
             event_manager,
+            vm_resources.machine_config.secret_free,
         )?;
     }
 
@@ -287,6 +350,7 @@ pub fn build_microvm_for_boot(
         kvm,
         vm,
         uffd: None,
+        uffd_socket: None,
         vcpus_handles: Vec::new(),
         vcpus_exit_evt,
         device_manager,
@@ -359,6 +423,17 @@ pub fn build_and_boot_microvm(
     Ok(vmm)
 }
 
+/// Sub-Error type for [`build_microvm_from_snapshot`] to contain either
+/// [`GuestMemoryFromFileError`] or [`GuestMemoryFromUffdError`] within
+/// [`BuildMicrovmFromSnapshotError`].
+#[derive(Debug, thiserror::Error, displaydoc::Display)]
+pub enum BuildMicrovmFromSnapshotErrorGuestMemoryError {
+    /// Error creating guest memory from file: {0}
+    File(#[from] GuestMemoryFromFileError),
+    /// Error creating guest memory from uffd: {0}
+    Uffd(#[from] GuestMemoryFromUffdError),
+}
+
 /// Error type for [`build_microvm_from_snapshot`].
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
 pub enum BuildMicrovmFromSnapshotError {
@@ -394,7 +469,53 @@ pub enum BuildMicrovmFromSnapshotError {
     SeccompFiltersInternal(#[from] crate::seccomp::InstallationError),
     /// Failed to restore devices: {0}
     RestoreDevices(#[from] DevicePersistError),
+    /// Failed to restore ACPI device manager: {0}
+    ACPIDeviManager(#[from] ACPIDeviceManagerRestoreError),
+    /// VMGenID update failed: {0}
+    VMGenIDUpdate(std::io::Error),
+    /// Internal error while restoring microVM: {0}
+    Internal(#[from] VmmError),
+    /// Failed to load guest memory: {0}
+    GuestMemory(#[from] BuildMicrovmFromSnapshotErrorGuestMemoryError),
+    /// Userfault bitmap memfd error: {0}
+    UserfaultBitmapMemfd(#[from] MemoryError),
+}
+
+fn memfd_to_slice(memfd: &mut Option<File>) -> Result<Option<&mut [u8]>, MemoryError> {
+    if let Some(bitmap_file) = memfd {
+        let len = u64_to_usize(
+            bitmap_file
+                .metadata()
+                .expect("Failed to get metadata")
+                .len(),
+        );
+
+        // SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
+        let bitmap_addr = unsafe {
+            libc::mmap(
+                std::ptr::null_mut(),
+                len,
+                libc::PROT_WRITE,
+                libc::MAP_SHARED,
+                bitmap_file.as_raw_fd(),
+                0,
+            )
+        };
+
+        if bitmap_addr == libc::MAP_FAILED {
+            return Err(MemoryError::Mmap(std::io::Error::last_os_error()));
+        }
+
+        // SAFETY: `bitmap_addr` is a valid memory address returned by `mmap`.
+        Ok(Some(unsafe {
+            std::slice::from_raw_parts_mut(bitmap_addr.cast(), len)
+        }))
+    } else {
+        Ok(None)
+    }
 }
+// TODO: take it from kvm-bindings when userfault support is merged upstream
+const KVM_CAP_USERFAULT: u32 = 245;
 
 /// Builds and starts a microVM based on the provided MicrovmState.
 ///
@@ -405,25 +526,96 @@ pub fn build_microvm_from_snapshot(
     instance_info: &InstanceInfo,
     event_manager: &mut EventManager,
     microvm_state: MicrovmState,
-    guest_memory: Vec<GuestRegionMmap>,
-    uffd: Option<Uffd>,
     seccomp_filters: &BpfThreadMap,
+    params: &LoadSnapshotParams,
     vm_resources: &mut VmResources,
 ) -> Result<Arc<Mutex<Vmm>>, BuildMicrovmFromSnapshotError> {
     // Build Vmm.
     debug!("event_start: build microvm from snapshot");
 
-    let kvm = Kvm::new(microvm_state.kvm_state.kvm_cap_modifiers.clone())
-        .map_err(StartMicrovmError::Kvm)?;
+    let secret_free = vm_resources.machine_config.secret_free;
+    let mut kvm_capabilities = microvm_state.kvm_state.kvm_cap_modifiers.clone();
+    if secret_free {
+        kvm_capabilities.push(KvmCapability::Add(Cap::GuestMemfd as u32));
+        kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GUEST_MEMFD_MMAP));
+        kvm_capabilities.push(KvmCapability::Add(KVM_CAP_GUEST_MEMFD_NO_DIRECT_MAP));
+        kvm_capabilities.push(KvmCapability::Add(KVM_CAP_USERFAULT));
+    }
+
+    let kvm = Kvm::new(kvm_capabilities).map_err(StartMicrovmError::Kvm)?;
     // Set up Kvm Vm and register memory regions.
     // Build custom CPU config if a custom template is provided.
-    let mut vm = Vm::new(&kvm).map_err(StartMicrovmError::Vm)?;
+    let mut vm = Vm::new(&kvm, secret_free).map_err(StartMicrovmError::Vm)?;
 
     let (mut vcpus, vcpus_exit_evt) = vm
-        .create_vcpus(vm_resources.machine_config.vcpu_count)
+        .create_vcpus(vm_resources.machine_config.vcpu_count, secret_free)
         .map_err(StartMicrovmError::Vm)?;
 
-    vm.register_memory_regions(guest_memory)
+    let guest_memfd = match secret_free {
+        true => Some(
+            vm.create_guest_memfd(
+                vm_resources.memory_size(),
+                GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_NO_DIRECT_MAP,
+            )
+            .map_err(VmmError::Vm)?,
+        ),
+        false => None,
+    };
+
+    let mut userfault_bitmap_memfd = if secret_free {
+        let bitmap_size = vm_resources.memory_size() / host_page_size() / u8::BITS as usize;
+        let bitmap_file = create_memfd(bitmap_size as u64, None)?;
+
+        Some(bitmap_file.into_file())
+    } else {
+        None
+    };
+
+    let mem_backend_path = &params.mem_backend.backend_path;
+    let mem_state = &microvm_state.vm_state.memory;
+    let track_dirty_pages = params.track_dirty_pages;
+
+    let (guest_memory, uffd, uffd_socket) = match params.mem_backend.backend_type {
+        MemBackendType::File => {
+            if vm_resources.machine_config.huge_pages.is_hugetlbfs() {
+                return Err(BuildMicrovmFromSnapshotErrorGuestMemoryError::File(
+                    GuestMemoryFromFileError::HugetlbfsSnapshot,
+                )
+                .into());
+            }
+            (
+                guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)
+                    .map_err(BuildMicrovmFromSnapshotErrorGuestMemoryError::File)?,
+                None,
+                None,
+            )
+        }
+        MemBackendType::Uffd => {
+            if vm_resources.machine_config.huge_pages.is_hugetlbfs() && guest_memfd.is_some() {
+                return Err(BuildMicrovmFromSnapshotErrorGuestMemoryError::Uffd(
+                    GuestMemoryFromUffdError::HugetlbfsSnapshot,
+                )
+                .into());
+            }
+            guest_memory_from_uffd(
+                mem_backend_path,
+                mem_state,
+                track_dirty_pages,
+                vm_resources.machine_config.huge_pages,
+                guest_memfd,
+                userfault_bitmap_memfd.as_ref(),
+            )
+            .map_err(BuildMicrovmFromSnapshotErrorGuestMemoryError::Uffd)?
+        }
+    };
+
+    let mut userfault_bitmap = memfd_to_slice(&mut userfault_bitmap_memfd)?;
+    if let Some(ref mut slice) = userfault_bitmap {
+        // Set all bits so a fault on any page will cause a VM exit
+        slice.fill(0xffu8);
+    }
+
+    vm.register_memory_regions(guest_memory, userfault_bitmap)
         .map_err(StartMicrovmError::Vm)?;
 
     #[cfg(target_arch = "x86_64")]
@@ -487,6 +679,7 @@ pub fn build_microvm_from_snapshot(
         kvm,
         vm,
         uffd,
+        uffd_socket,
         vcpus_handles: Vec::new(),
         vcpus_exit_evt,
         device_manager,
@@ -565,6 +758,7 @@ fn attach_entropy_device(
     cmdline: &mut LoaderKernelCmdline,
     entropy_device: &Arc<Mutex<Entropy>>,
     event_manager: &mut EventManager,
+    secret_free: bool,
 ) -> Result<(), AttachDeviceError> {
     let id = entropy_device
         .lock()
@@ -573,7 +767,7 @@ fn attach_entropy_device(
         .to_string();
 
     event_manager.add_subscriber(entropy_device.clone());
-    device_manager.attach_virtio_device(vm, id, entropy_device.clone(), cmdline, false)
+    device_manager.attach_virtio_device(vm, id, entropy_device.clone(), cmdline, false, secret_free)
 }
 
 fn attach_block_devices<'a, I: Iterator<Item = &'a Arc<Mutex<Block>>> + Debug>(
@@ -582,6 +776,7 @@ fn attach_block_devices<'a, I: Iterator<Item = &'a Arc<Mutex<Block>>> + Debug>(
     cmdline: &mut LoaderKernelCmdline,
     blocks: I,
     event_manager: &mut EventManager,
+    secret_free: bool,
 ) -> Result<(), StartMicrovmError> {
     for block in blocks {
         let (id, is_vhost_user) = {
@@ -600,7 +795,14 @@ fn attach_block_devices<'a, I: Iterator<Item = &'a Arc<Mutex<Block>>> + Debug>(
         };
         // The device mutex mustn't be locked here otherwise it will deadlock.
         event_manager.add_subscriber(block.clone());
-        device_manager.attach_virtio_device(vm, id, block.clone(), cmdline, is_vhost_user)?;
+        device_manager.attach_virtio_device(
+            vm,
+            id,
+            block.clone(),
+            cmdline,
+            is_vhost_user,
+            secret_free,
+        )?;
     }
     Ok(())
 }
@@ -611,12 +813,20 @@ fn attach_net_devices<'a, I: Iterator<Item = &'a Arc<Mutex<Net>>> + Debug>(
     cmdline: &mut LoaderKernelCmdline,
     net_devices: I,
     event_manager: &mut EventManager,
+    secret_free: bool,
 ) -> Result<(), StartMicrovmError> {
     for net_device in net_devices {
         let id = net_device.lock().expect("Poisoned lock").id().clone();
         event_manager.add_subscriber(net_device.clone());
         // The device mutex mustn't be locked here otherwise it will deadlock.
-        device_manager.attach_virtio_device(vm, id, net_device.clone(), cmdline, false)?;
+        device_manager.attach_virtio_device(
+            vm,
+            id,
+            net_device.clone(),
+            cmdline,
+            false,
+            secret_free,
+        )?;
     }
     Ok(())
 }
@@ -627,11 +837,12 @@ fn attach_unixsock_vsock_device(
     cmdline: &mut LoaderKernelCmdline,
     unix_vsock: &Arc<Mutex<Vsock<VsockUnixBackend>>>,
     event_manager: &mut EventManager,
+    secret_free: bool,
 ) -> Result<(), AttachDeviceError> {
     let id = String::from(unix_vsock.lock().expect("Poisoned lock").id());
     event_manager.add_subscriber(unix_vsock.clone());
     // The device mutex mustn't be locked here otherwise it will deadlock.
-    device_manager.attach_virtio_device(vm, id, unix_vsock.clone(), cmdline, false)
+    device_manager.attach_virtio_device(vm, id, unix_vsock.clone(), cmdline, false, secret_free)
 }
 
 fn attach_balloon_device(
@@ -640,11 +851,12 @@ fn attach_balloon_device(
     cmdline: &mut LoaderKernelCmdline,
     balloon: &Arc<Mutex<Balloon>>,
     event_manager: &mut EventManager,
+    secret_free: bool,
 ) -> Result<(), AttachDeviceError> {
     let id = String::from(balloon.lock().expect("Poisoned lock").id());
     event_manager.add_subscriber(balloon.clone());
     // The device mutex mustn't be locked here otherwise it will deadlock.
-    device_manager.attach_virtio_device(vm, id, balloon.clone(), cmdline, false)
+    device_manager.attach_virtio_device(vm, id, balloon.clone(), cmdline, false, secret_free)
 }
 
 #[cfg(test)]
@@ -727,7 +939,7 @@ pub(crate) mod tests {
     pub(crate) fn default_vmm() -> Vmm {
         let (kvm, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
 
-        let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap();
+        let (_, vcpus_exit_evt) = vm.create_vcpus(1, false).unwrap();
 
         Vmm {
             instance_info: InstanceInfo::default(),
@@ -735,6 +947,7 @@ pub(crate) mod tests {
             kvm,
             vm: Arc::new(vm),
             uffd: None,
+            uffd_socket: None,
             vcpus_handles: Vec::new(),
             vcpus_exit_evt,
             device_manager: default_device_manager(),
@@ -783,6 +996,7 @@ pub(crate) mod tests {
             cmdline,
             block_dev_configs.devices.iter(),
             event_manager,
+            false,
         )
         .unwrap();
         block_files
@@ -803,6 +1017,7 @@ pub(crate) mod tests {
             cmdline,
             net_builder.iter(),
             event_manager,
+            false,
         );
         res.unwrap();
     }
@@ -830,6 +1045,7 @@ pub(crate) mod tests {
             cmdline,
             net_builder.iter(),
             event_manager,
+            false,
         )
         .unwrap();
     }
@@ -850,6 +1066,7 @@ pub(crate) mod tests {
             cmdline,
             &vsock,
             event_manager,
+            false,
         )
         .unwrap();
 
@@ -875,6 +1092,7 @@ pub(crate) mod tests {
             cmdline,
             &entropy,
             event_manager,
+            false,
         )
         .unwrap();
 
@@ -909,6 +1127,7 @@ pub(crate) mod tests {
             cmdline,
             balloon,
             event_manager,
+            false,
         )
         .unwrap();
 
diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs
index 46accb637b0..66dd3cdae5b 100644
--- a/src/vmm/src/device_manager/mmio.rs
+++ b/src/vmm/src/device_manager/mmio.rs
@@ -534,6 +534,14 @@ pub(crate) mod tests {
 
         fn set_acked_features(&mut self, _: u64) {}
 
+        fn force_userspace_bounce_buffers(&mut self) {
+            todo!()
+        }
+
+        fn userspace_bounce_buffers(&self) -> bool {
+            todo!()
+        }
+
         fn queues(&self) -> &[Queue] {
             &self.queues
         }
@@ -585,8 +593,8 @@ pub(crate) mod tests {
         let start_addr2 = GuestAddress(0x1000);
         let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
         let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
-        let mut vm = Vm::new(&kvm).unwrap();
-        vm.register_memory_regions(guest_mem).unwrap();
+        let mut vm = Vm::new(&kvm, false).unwrap();
+        vm.register_memory_regions(guest_mem, None).unwrap();
         let mut device_manager = MMIODeviceManager::new();
 
         let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
@@ -631,8 +639,8 @@ pub(crate) mod tests {
         let start_addr2 = GuestAddress(0x1000);
         let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
         let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
-        let mut vm = Vm::new(&kvm).unwrap();
-        vm.register_memory_regions(guest_mem).unwrap();
+        let mut vm = Vm::new(&kvm, false).unwrap();
+        vm.register_memory_regions(guest_mem, None).unwrap();
         let mut device_manager = MMIODeviceManager::new();
 
         let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap();
@@ -684,8 +692,8 @@ pub(crate) mod tests {
         let start_addr2 = GuestAddress(0x1000);
         let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]);
         let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
-        let mut vm = Vm::new(&kvm).unwrap();
-        vm.register_memory_regions(guest_mem).unwrap();
+        let mut vm = Vm::new(&kvm, false).unwrap();
+        vm.register_memory_regions(guest_mem, None).unwrap();
 
         #[cfg(target_arch = "x86_64")]
         vm.setup_irqchip().unwrap();
diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs
index d7052422a3a..6e5e76b1e76 100644
--- a/src/vmm/src/device_manager/mod.rs
+++ b/src/vmm/src/device_manager/mod.rs
@@ -220,7 +220,12 @@ impl DeviceManager {
         device: Arc<Mutex<T>>,
         cmdline: &mut Cmdline,
         is_vhost_user: bool,
+        secret_free: bool,
     ) -> Result<(), AttachDeviceError> {
+        if secret_free {
+            device.lock().unwrap().force_userspace_bounce_buffers()
+        }
+
         if self.pci_devices.pci_segment.is_some() {
             self.pci_devices.attach_pci_virtio_device(vm, id, device)?;
         } else {
diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs
index f1ec39ab1d5..2ba3154fddb 100644
--- a/src/vmm/src/device_manager/pci_mngr.rs
+++ b/src/vmm/src/device_manager/pci_mngr.rs
@@ -704,6 +704,7 @@ mod tests {
   "machine-config": {{
     "vcpu_count": 1,
     "mem_size_mib": 128,
+    "secret_free": false,
     "smt": false,
     "track_dirty_pages": false,
     "huge_pages": "None"
diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs
index d6d46fff0f5..4b6560fbf23 100644
--- a/src/vmm/src/device_manager/persist.rs
+++ b/src/vmm/src/device_manager/persist.rs
@@ -721,6 +721,7 @@ mod tests {
   "machine-config": {{
     "vcpu_count": 1,
     "mem_size_mib": 128,
+    "secret_free": false,
     "smt": false,
     "track_dirty_pages": false,
     "huge_pages": "None"
diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs
index 87a82c4fa9d..c8376bc87b9 100644
--- a/src/vmm/src/devices/virtio/balloon/device.rs
+++ b/src/vmm/src/devices/virtio/balloon/device.rs
@@ -558,6 +558,14 @@ impl VirtioDevice for Balloon {
         self.acked_features = acked_features;
     }
 
+    fn force_userspace_bounce_buffers(&mut self) {
+        // balloon device doesn't have a need for bounce buffers
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        false
+    }
+
     fn queues(&self) -> &[Queue] {
         &self.queues
     }
diff --git a/src/vmm/src/devices/virtio/block/device.rs b/src/vmm/src/devices/virtio/block/device.rs
index 13155efb31d..1a939038440 100644
--- a/src/vmm/src/devices/virtio/block/device.rs
+++ b/src/vmm/src/devices/virtio/block/device.rs
@@ -156,6 +156,20 @@ impl VirtioDevice for Block {
         }
     }
 
+    fn force_userspace_bounce_buffers(&mut self) {
+        match self {
+            Block::Virtio(b) => b.force_userspace_bounce_buffers(),
+            Block::VhostUser(b) => b.force_userspace_bounce_buffers(),
+        }
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        match self {
+            Block::Virtio(b) => b.userspace_bounce_buffers(),
+            Block::VhostUser(b) => b.userspace_bounce_buffers(),
+        }
+    }
+
     fn queues(&self) -> &[Queue] {
         match self {
             Self::Virtio(b) => &b.queues,
diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs
index dd08b8de7c8..38071e658b4 100644
--- a/src/vmm/src/devices/virtio/block/vhost_user/device.rs
+++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs
@@ -302,6 +302,15 @@ impl<T: VhostUserHandleBackend + Send + 'static> VirtioDevice for VhostUserBlock
         self.acked_features = acked_features;
     }
 
+    fn force_userspace_bounce_buffers(&mut self) {
+        // Nothing Firecracker can do about this, the backend would need to do the bouncing
+        panic!("vhost-user-blk is incompatible with userspace bounce buffers")
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        false
+    }
+
     fn queues(&self) -> &[Queue] {
         &self.queues
     }
diff --git a/src/vmm/src/devices/virtio/block/virtio/device.rs b/src/vmm/src/devices/virtio/block/virtio/device.rs
index ecdd8ee4f6d..4df0b87c8d4 100644
--- a/src/vmm/src/devices/virtio/block/virtio/device.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/device.rs
@@ -597,6 +597,22 @@ impl VirtioDevice for VirtioBlock {
         self.acked_features = acked_features;
     }
 
+    fn force_userspace_bounce_buffers(&mut self) {
+        match self.disk.file_engine {
+            FileEngine::Async(_) => {
+                panic!("async engine is incompatible with userspace bounce buffers")
+            }
+            FileEngine::Sync(ref mut engine) => engine.start_bouncing(),
+        }
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        match self.disk.file_engine {
+            FileEngine::Async(_) => false,
+            FileEngine::Sync(ref engine) => engine.is_bouncing(),
+        }
+    }
+
     fn queues(&self) -> &[Queue] {
         &self.queues
     }
diff --git a/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs b/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs
index eec3b3d8b8d..576a0a5b1f2 100644
--- a/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs
@@ -6,7 +6,7 @@ use std::io::{Seek, SeekFrom, Write};
 
 use vm_memory::{GuestMemoryError, ReadVolatile, WriteVolatile};
 
-use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap};
+use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap, MaybeBounce};
 
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
 pub enum SyncIoError {
@@ -22,7 +22,12 @@ pub enum SyncIoError {
 
 #[derive(Debug)]
 pub struct SyncFileEngine {
-    file: File,
+    // 65536 is the largest buffer a linux guest will give us, empirically. Determined by
+    // having `MaybeBounce` logging scenarios where the fixed size bounce buffer isn't sufficient.
+    // Note that even if this assumption ever changes, the worse that'll happen is that we do
+    // multiple roundtrips between guest memory and the bounce buffer, as MaybeBounce would
+    // just chop larger reads/writes into chunks of 65k.
+    file: MaybeBounce<File, { u16::MAX as usize + 1 }>,
 }
 
 // SAFETY: `File` is send and ultimately a POD.
@@ -30,17 +35,27 @@ unsafe impl Send for SyncFileEngine {}
 
 impl SyncFileEngine {
     pub fn from_file(file: File) -> SyncFileEngine {
-        SyncFileEngine { file }
+        SyncFileEngine {
+            file: MaybeBounce::new_persistent(file, false),
+        }
     }
 
     #[cfg(test)]
     pub fn file(&self) -> &File {
-        &self.file
+        &self.file.target
+    }
+
+    pub fn start_bouncing(&mut self) {
+        self.file.activate()
+    }
+
+    pub fn is_bouncing(&self) -> bool {
+        self.file.is_activated()
     }
 
     /// Update the backing file of the engine
     pub fn update_file(&mut self, file: File) {
-        self.file = file
+        self.file.target = file
     }
 
     pub fn read(
@@ -77,8 +92,8 @@ impl SyncFileEngine {
 
     pub fn flush(&mut self) -> Result<(), SyncIoError> {
         // flush() first to force any cached data out of rust buffers.
-        self.file.flush().map_err(SyncIoError::Flush)?;
+        self.file.target.flush().map_err(SyncIoError::Flush)?;
         // Sync data out to physical media on host.
-        self.file.sync_all().map_err(SyncIoError::SyncAll)
+        self.file.target.sync_all().map_err(SyncIoError::SyncAll)
     }
 }
diff --git a/src/vmm/src/devices/virtio/block/virtio/persist.rs b/src/vmm/src/devices/virtio/block/virtio/persist.rs
index 380fe1de0e8..1abe137e424 100644
--- a/src/vmm/src/devices/virtio/block/virtio/persist.rs
+++ b/src/vmm/src/devices/virtio/block/virtio/persist.rs
@@ -12,7 +12,7 @@ use super::*;
 use crate::devices::virtio::block::persist::BlockConstructorArgs;
 use crate::devices::virtio::block::virtio::device::FileEngineType;
 use crate::devices::virtio::block::virtio::metrics::BlockMetricsPerDevice;
-use crate::devices::virtio::device::{ActiveState, DeviceState};
+use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice};
 use crate::devices::virtio::generated::virtio_blk::VIRTIO_BLK_F_RO;
 use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_BLOCK;
 use crate::devices::virtio::persist::VirtioDeviceState;
@@ -115,7 +115,7 @@ impl Persist<'_> for VirtioBlock {
             capacity: disk_properties.nsectors.to_le(),
         };
 
-        Ok(VirtioBlock {
+        let mut dev = VirtioBlock {
             avail_features,
             acked_features,
             config_space,
@@ -135,7 +135,13 @@ impl Persist<'_> for VirtioBlock {
             rate_limiter,
             is_io_engine_throttled: false,
             metrics: BlockMetricsPerDevice::alloc(state.id.clone()),
-        })
+        };
+
+        if state.virtio_state.bounce_in_userspace {
+            dev.force_userspace_bounce_buffers()
+        }
+
+        Ok(dev)
     }
 }
 
diff --git a/src/vmm/src/devices/virtio/device.rs b/src/vmm/src/devices/virtio/device.rs
index 8d98b3f0d11..f61ce8f007f 100644
--- a/src/vmm/src/devices/virtio/device.rs
+++ b/src/vmm/src/devices/virtio/device.rs
@@ -69,6 +69,12 @@ pub trait VirtioDevice: AsAny + Send {
     /// - self.avail_features() & self.acked_features() = self.get_acked_features()
     fn set_acked_features(&mut self, acked_features: u64);
 
+    /// Make the virtio device user userspace bounce buffers
+    fn force_userspace_bounce_buffers(&mut self);
+
+    /// Whether this device is using userspace bounce buffers
+    fn userspace_bounce_buffers(&self) -> bool;
+
     /// Check if virtio device has negotiated given feature.
     fn has_feature(&self, feature: u64) -> bool {
         (self.acked_features() & (1 << feature)) != 0
@@ -215,6 +221,14 @@ pub(crate) mod tests {
             todo!()
         }
 
+        fn force_userspace_bounce_buffers(&mut self) {
+            todo!()
+        }
+
+        fn userspace_bounce_buffers(&self) -> bool {
+            todo!()
+        }
+
         fn queues(&self) -> &[Queue] {
             todo!()
         }
diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs
index d235c539c83..0a08f8318b3 100755
--- a/src/vmm/src/devices/virtio/net/device.rs
+++ b/src/vmm/src/devices/virtio/net/device.rs
@@ -6,6 +6,7 @@
 // found in the THIRD-PARTY file.
 
 use std::collections::VecDeque;
+use std::io::{Read, Write};
 use std::mem::{self};
 use std::net::Ipv4Addr;
 use std::num::Wrapping;
@@ -14,6 +15,7 @@ use std::sync::{Arc, Mutex};
 
 use libc::{EAGAIN, iovec};
 use log::{error, info};
+use vm_memory::VolatileSlice;
 use vmm_sys_util::eventfd::EventFd;
 
 use super::NET_QUEUE_MAX_SIZE;
@@ -250,7 +252,9 @@ pub struct Net {
     pub(crate) rx_rate_limiter: RateLimiter,
     pub(crate) tx_rate_limiter: RateLimiter,
 
-    rx_frame_buf: [u8; MAX_BUFFER_SIZE],
+    /// Used both for bounce buffering and for relaying frames to MMDS
+    userspace_buffer: [u8; MAX_BUFFER_SIZE],
+    pub(crate) userspace_bouncing: bool,
 
     tx_frame_headers: [u8; frame_hdr_len()],
 
@@ -314,8 +318,9 @@ impl Net {
             queue_evts,
             rx_rate_limiter,
             tx_rate_limiter,
-            rx_frame_buf: [0u8; MAX_BUFFER_SIZE],
+            userspace_buffer: [0u8; MAX_BUFFER_SIZE],
             tx_frame_headers: [0u8; frame_hdr_len()],
+            userspace_bouncing: false,
             config_space,
             guest_mac,
             device_state: DeviceState::Inactive,
@@ -501,6 +506,7 @@ impl Net {
     // Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
     //
     // Returns whether MMDS consumed the frame.
+    #[allow(clippy::too_many_arguments)]
     fn write_to_mmds_or_tap(
         mmds_ns: Option<&mut MmdsNetworkStack>,
         rate_limiter: &mut RateLimiter,
@@ -509,6 +515,7 @@ impl Net {
         tap: &mut Tap,
         guest_mac: Option<MacAddr>,
         net_metrics: &NetDeviceMetrics,
+        bb: Option<&mut [u8]>,
     ) -> Result<bool, NetError> {
         // Read the frame headers from the IoVecBuffer
         let max_header_len = headers.len();
@@ -556,7 +563,7 @@ impl Net {
         }
 
         let _metric = net_metrics.tap_write_agg.record_latency_metrics();
-        match Self::write_tap(tap, frame_iovec) {
+        match Self::write_tap(tap, frame_iovec, bb) {
             Ok(_) => {
                 let len = u64::from(frame_iovec.len());
                 net_metrics.tx_bytes_count.add(len);
@@ -590,15 +597,15 @@ impl Net {
 
         if let Some(ns) = self.mmds_ns.as_mut()
             && let Some(len) =
-                ns.write_next_frame(frame_bytes_from_buf_mut(&mut self.rx_frame_buf)?)
+                ns.write_next_frame(frame_bytes_from_buf_mut(&mut self.userspace_buffer)?)
         {
             let len = len.get();
             METRICS.mmds.tx_frames.inc();
             METRICS.mmds.tx_bytes.add(len as u64);
-            init_vnet_hdr(&mut self.rx_frame_buf);
+            init_vnet_hdr(&mut self.userspace_buffer);
             self.rx_buffer
                 .iovec
-                .write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?;
+                .write_all_volatile_at(&self.userspace_buffer[..vnet_hdr_len() + len], 0)?;
             // SAFETY:
             // * len will never be bigger that u32::MAX because mmds is bound
             // by the size of `self.rx_frame_buf` which is MAX_BUFFER_SIZE size.
@@ -737,6 +744,8 @@ impl Net {
                 &mut self.tap,
                 self.guest_mac,
                 &self.metrics,
+                self.userspace_bouncing
+                    .then_some(self.userspace_buffer.as_mut_slice()),
             )
             .unwrap_or(false);
             if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 {
@@ -827,11 +836,57 @@ impl Net {
         } else {
             self.rx_buffer.single_chain_slice_mut()
         };
-        self.tap.read_iovec(slice)
+
+        if self.userspace_bouncing {
+            let how_many = self
+                .tap
+                .tap_file
+                .read(self.userspace_buffer.as_mut_slice())?;
+
+            assert!(how_many <= MAX_BUFFER_SIZE);
+
+            let mut offset = 0;
+            for iov in slice {
+                assert!(
+                    offset <= how_many,
+                    "copied more bytes into guest memory than read from tap"
+                );
+
+                let to_copy = (how_many - offset).min(iov.iov_len);
+
+                if to_copy == 0 {
+                    break;
+                }
+
+                // SAFETY: the iovec comes from an `IoVecBufferMut`, which upholds the invariant
+                // that all contained iovecs are covering valid ranges of guest memory.
+                // Particularly, to_copy <= iov.iov_len
+                let vslice = unsafe { VolatileSlice::new(iov.iov_base.cast(), to_copy) };
+
+                vslice.copy_from(&self.userspace_buffer[offset..]);
+
+                offset += to_copy;
+            }
+
+            Ok(how_many)
+        } else {
+            self.tap.read_iovec(slice)
+        }
     }
 
-    fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result<usize> {
-        tap.write_iovec(buf)
+    fn write_tap(
+        tap: &mut Tap,
+        buf: &IoVecBuffer,
+        bounce_buffer: Option<&mut [u8]>,
+    ) -> std::io::Result<usize> {
+        if let Some(bb) = bounce_buffer {
+            let how_many = buf.len() as usize;
+            let copied = buf.read_volatile_at(&mut &mut *bb, 0, how_many).unwrap();
+            assert_eq!(copied, how_many);
+            tap.tap_file.write(&bb[..copied])
+        } else {
+            tap.write_iovec(buf)
+        }
     }
 
     /// Process a single RX queue event.
@@ -975,6 +1030,14 @@ impl VirtioDevice for Net {
         self.acked_features = acked_features;
     }
 
+    fn force_userspace_bounce_buffers(&mut self) {
+        self.userspace_bouncing = true
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        self.userspace_bouncing
+    }
+
     fn queues(&self) -> &[Queue] {
         &self.queues
     }
@@ -2026,6 +2089,7 @@ pub mod tests {
                     &mut net.tap,
                     Some(src_mac),
                     &net.metrics,
+                    None
                 )
                 .unwrap()
             )
@@ -2065,6 +2129,7 @@ pub mod tests {
                 &mut net.tap,
                 Some(guest_mac),
                 &net.metrics,
+                None
             )
         );
 
@@ -2080,6 +2145,7 @@ pub mod tests {
                 &mut net.tap,
                 Some(not_guest_mac),
                 &net.metrics,
+                None
             )
         );
     }
diff --git a/src/vmm/src/devices/virtio/net/persist.rs b/src/vmm/src/devices/virtio/net/persist.rs
index ba56cc39aac..e46c349ec08 100644
--- a/src/vmm/src/devices/virtio/net/persist.rs
+++ b/src/vmm/src/devices/virtio/net/persist.rs
@@ -127,6 +127,7 @@ impl Persist<'_> for Net {
         )?;
         net.avail_features = state.virtio_state.avail_features;
         net.acked_features = state.virtio_state.acked_features;
+        net.userspace_bouncing = state.virtio_state.bounce_in_userspace;
 
         Ok(net)
     }
diff --git a/src/vmm/src/devices/virtio/net/tap.rs b/src/vmm/src/devices/virtio/net/tap.rs
index 3cfdf1e7fdf..487010aafc1 100644
--- a/src/vmm/src/devices/virtio/net/tap.rs
+++ b/src/vmm/src/devices/virtio/net/tap.rs
@@ -49,7 +49,7 @@ ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int);
 /// Tap goes out of scope, and the kernel will clean up the interface automatically.
 #[derive(Debug)]
 pub struct Tap {
-    tap_file: File,
+    pub(crate) tap_file: File,
     pub(crate) if_name: [u8; IFACE_NAME_MAX_LEN],
 }
 
diff --git a/src/vmm/src/devices/virtio/persist.rs b/src/vmm/src/devices/virtio/persist.rs
index 85c4940f305..f36d12150c5 100644
--- a/src/vmm/src/devices/virtio/persist.rs
+++ b/src/vmm/src/devices/virtio/persist.rs
@@ -126,17 +126,20 @@ pub struct VirtioDeviceState {
     pub queues: Vec<QueueState>,
     /// Flag for activated status.
     pub activated: bool,
+    /// Whether this device has to use userspace bounce buffers
+    pub bounce_in_userspace: bool,
 }
 
 impl VirtioDeviceState {
     /// Construct the virtio state of a device.
-    pub fn from_device(device: &dyn VirtioDevice) -> Self {
+    pub fn from_device(device: &impl VirtioDevice) -> Self {
         VirtioDeviceState {
             device_type: device.device_type(),
             avail_features: device.avail_features(),
             acked_features: device.acked_features(),
             queues: device.queues().iter().map(Persist::save).collect(),
             activated: device.is_activated(),
+            bounce_in_userspace: device.userspace_bounce_buffers(),
         }
     }
 
diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs
index 6f488fbe217..05ba7987c80 100644
--- a/src/vmm/src/devices/virtio/rng/device.rs
+++ b/src/vmm/src/devices/virtio/rng/device.rs
@@ -320,6 +320,14 @@ impl VirtioDevice for Entropy {
             self.process_virtio_queues();
         }
     }
+
+    fn force_userspace_bounce_buffers(&mut self) {
+        // rng device works with only userspace accesses
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        false
+    }
 }
 
 #[cfg(test)]
diff --git a/src/vmm/src/devices/virtio/transport/mmio.rs b/src/vmm/src/devices/virtio/transport/mmio.rs
index c20928e3c29..2e45ab6956a 100644
--- a/src/vmm/src/devices/virtio/transport/mmio.rs
+++ b/src/vmm/src/devices/virtio/transport/mmio.rs
@@ -531,6 +531,14 @@ pub(crate) mod tests {
             self.acked_features = acked_features;
         }
 
+        fn force_userspace_bounce_buffers(&mut self) {
+            unimplemented!()
+        }
+
+        fn userspace_bounce_buffers(&self) -> bool {
+            false
+        }
+
         fn queues(&self) -> &[Queue] {
             &self.queues
         }
diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs
index 3d6e4aee6a8..ccbc2fb3b89 100644
--- a/src/vmm/src/devices/virtio/transport/pci/device.rs
+++ b/src/vmm/src/devices/virtio/transport/pci/device.rs
@@ -1042,6 +1042,7 @@ mod tests {
                 entropy.clone(),
                 &mut Cmdline::new(1024).unwrap(),
                 false,
+                false,
             )
             .unwrap();
         vmm
diff --git a/src/vmm/src/devices/virtio/vsock/csm/connection.rs b/src/vmm/src/devices/virtio/vsock/csm/connection.rs
index a5a2f4aec5b..b871450076a 100644
--- a/src/vmm/src/devices/virtio/vsock/csm/connection.rs
+++ b/src/vmm/src/devices/virtio/vsock/csm/connection.rs
@@ -95,6 +95,7 @@ use crate::devices::virtio::vsock::metrics::METRICS;
 use crate::devices::virtio::vsock::packet::{VsockPacketHeader, VsockPacketRx, VsockPacketTx};
 use crate::logger::IncMetric;
 use crate::utils::wrap_usize_to_u32;
+use crate::vstate::memory::MaybeBounce;
 
 /// Trait that vsock connection backends need to implement.
 ///
@@ -118,7 +119,7 @@ pub struct VsockConnection<S: VsockConnectionBackend> {
     /// The peer (guest) port.
     peer_port: u32,
     /// The (connected) host-side stream.
-    stream: S,
+    pub(crate) stream: MaybeBounce<S, { u16::MAX as usize }>,
     /// The TX buffer for this connection.
     tx_buf: TxBuf,
     /// Total number of bytes that have been successfully written to `self.stream`, either
@@ -414,7 +415,7 @@ where
     /// The connection is interested in being notified about EPOLLIN / EPOLLOUT events on the
     /// host stream.
     fn as_raw_fd(&self) -> RawFd {
-        self.stream.as_raw_fd()
+        self.stream.target.as_raw_fd()
     }
 }
 
@@ -509,13 +510,14 @@ where
         local_port: u32,
         peer_port: u32,
         peer_buf_alloc: u32,
+        bounce: bool,
     ) -> Self {
         Self {
             local_cid,
             peer_cid,
             local_port,
             peer_port,
-            stream,
+            stream: MaybeBounce::new_persistent(stream, bounce),
             state: ConnState::PeerInit,
             tx_buf: TxBuf::new(),
             fwd_cnt: Wrapping(0),
@@ -535,13 +537,14 @@ where
         peer_cid: u64,
         local_port: u32,
         peer_port: u32,
+        bounce: bool,
     ) -> Self {
         Self {
             local_cid,
             peer_cid,
             local_port,
             peer_port,
-            stream,
+            stream: MaybeBounce::new_persistent(stream, bounce),
             state: ConnState::LocalInit,
             tx_buf: TxBuf::new(),
             fwd_cnt: Wrapping(0),
@@ -882,9 +885,10 @@ mod tests {
                     LOCAL_PORT,
                     PEER_PORT,
                     PEER_BUF_ALLOC,
+                    false,
                 ),
                 ConnState::LocalInit => VsockConnection::<TestStream>::new_local_init(
-                    stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT,
+                    stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT, false,
                 ),
                 ConnState::Established => {
                     let mut conn = VsockConnection::<TestStream>::new_peer_init(
@@ -894,6 +898,7 @@ mod tests {
                         LOCAL_PORT,
                         PEER_PORT,
                         PEER_BUF_ALLOC,
+                        false,
                     );
                     assert!(conn.has_pending_rx());
                     conn.recv_pkt(&mut rx_pkt).unwrap();
@@ -912,7 +917,7 @@ mod tests {
         }
 
         fn set_stream(&mut self, stream: TestStream) {
-            self.conn.stream = stream;
+            self.conn.stream = MaybeBounce::new_persistent(stream, false);
         }
 
         fn set_peer_credit(&mut self, credit: u32) {
@@ -1014,7 +1019,7 @@ mod tests {
         let mut ctx = CsmTestContext::new_established();
         let data = &[1, 2, 3, 4];
         ctx.set_stream(TestStream::new_with_read_buf(data));
-        assert_eq!(ctx.conn.as_raw_fd(), ctx.conn.stream.as_raw_fd());
+        assert_eq!(ctx.conn.as_raw_fd(), ctx.conn.stream.target.as_raw_fd());
         ctx.notify_epollin();
         ctx.recv();
         assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RW);
@@ -1098,7 +1103,7 @@ mod tests {
 
             ctx.init_data_tx_pkt(data);
             ctx.send();
-            assert_eq!(ctx.conn.stream.write_buf.len(), 0);
+            assert_eq!(ctx.conn.stream.target.write_buf.len(), 0);
             assert!(ctx.conn.tx_buf.is_empty());
         }
 
@@ -1113,7 +1118,7 @@ mod tests {
             let data = &[1, 2, 3, 4];
             ctx.init_data_tx_pkt(data);
             ctx.send();
-            assert_eq!(ctx.conn.stream.write_buf, data.to_vec());
+            assert_eq!(ctx.conn.stream.target.write_buf, data.to_vec());
 
             ctx.notify_epollin();
             ctx.recv();
@@ -1233,7 +1238,7 @@ mod tests {
             ctx.set_stream(TestStream::new());
             ctx.conn.notify(EventSet::OUT);
             assert!(ctx.conn.tx_buf.is_empty());
-            assert_eq!(ctx.conn.stream.write_buf, data);
+            assert_eq!(ctx.conn.stream.target.write_buf, data);
         }
     }
 
diff --git a/src/vmm/src/devices/virtio/vsock/device.rs b/src/vmm/src/devices/virtio/vsock/device.rs
index 7fe10d158ad..465b6c5dfd3 100644
--- a/src/vmm/src/devices/virtio/vsock/device.rs
+++ b/src/vmm/src/devices/virtio/vsock/device.rs
@@ -298,6 +298,14 @@ where
         self.acked_features = acked_features
     }
 
+    fn force_userspace_bounce_buffers(&mut self) {
+        self.backend.start_bouncing()
+    }
+
+    fn userspace_bounce_buffers(&self) -> bool {
+        self.backend.is_bouncing()
+    }
+
     fn queues(&self) -> &[VirtQueue] {
         &self.queues
     }
diff --git a/src/vmm/src/devices/virtio/vsock/mod.rs b/src/vmm/src/devices/virtio/vsock/mod.rs
index cc9f7746580..4cb892083f9 100644
--- a/src/vmm/src/devices/virtio/vsock/mod.rs
+++ b/src/vmm/src/devices/virtio/vsock/mod.rs
@@ -179,4 +179,7 @@ pub trait VsockChannel {
 /// The vsock backend, which is basically an epoll-event-driven vsock channel.
 /// Currently, the only implementation we have is `crate::devices::virtio::unix::muxer::VsockMuxer`,
 /// which translates guest-side vsock connections to host-side Unix domain socket connections.
-pub trait VsockBackend: VsockChannel + VsockEpollListener + Send {}
+pub trait VsockBackend: VsockChannel + VsockEpollListener + Send {
+    fn start_bouncing(&mut self);
+    fn is_bouncing(&self) -> bool;
+}
diff --git a/src/vmm/src/devices/virtio/vsock/persist.rs b/src/vmm/src/devices/virtio/vsock/persist.rs
index acf330a3e71..de50e134270 100644
--- a/src/vmm/src/devices/virtio/vsock/persist.rs
+++ b/src/vmm/src/devices/virtio/vsock/persist.rs
@@ -9,7 +9,7 @@ use std::sync::Arc;
 use serde::{Deserialize, Serialize};
 
 use super::*;
-use crate::devices::virtio::device::{ActiveState, DeviceState};
+use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice};
 use crate::devices::virtio::generated::virtio_ids::{self, VIRTIO_ID_VSOCK};
 use crate::devices::virtio::persist::VirtioDeviceState;
 use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;
@@ -122,6 +122,11 @@ where
         vsock.acked_features = state.virtio_state.acked_features;
         vsock.avail_features = state.virtio_state.avail_features;
         vsock.device_state = DeviceState::Inactive;
+
+        if state.virtio_state.bounce_in_userspace {
+            vsock.force_userspace_bounce_buffers();
+        }
+
         Ok(vsock)
     }
 }
diff --git a/src/vmm/src/devices/virtio/vsock/test_utils.rs b/src/vmm/src/devices/virtio/vsock/test_utils.rs
index 3d4ab704975..f7e12138de5 100644
--- a/src/vmm/src/devices/virtio/vsock/test_utils.rs
+++ b/src/vmm/src/devices/virtio/vsock/test_utils.rs
@@ -113,7 +113,15 @@ impl VsockEpollListener for TestBackend {
         self.evset = Some(evset);
     }
 }
-impl VsockBackend for TestBackend {}
+impl VsockBackend for TestBackend {
+    fn start_bouncing(&mut self) {
+        unimplemented!()
+    }
+
+    fn is_bouncing(&self) -> bool {
+        false
+    }
+}
 
 #[derive(Debug)]
 pub struct TestContext {
diff --git a/src/vmm/src/devices/virtio/vsock/unix/muxer.rs b/src/vmm/src/devices/virtio/vsock/unix/muxer.rs
index ad979b4bdeb..331f762d9d0 100644
--- a/src/vmm/src/devices/virtio/vsock/unix/muxer.rs
+++ b/src/vmm/src/devices/virtio/vsock/unix/muxer.rs
@@ -108,6 +108,7 @@ pub struct VsockMuxer {
     local_port_set: HashSet<u32>,
     /// The last used host-side port.
     local_port_last: u32,
+    bounce: bool,
 }
 
 impl VsockChannel for VsockMuxer {
@@ -299,7 +300,19 @@ impl VsockEpollListener for VsockMuxer {
     }
 }
 
-impl VsockBackend for VsockMuxer {}
+impl VsockBackend for VsockMuxer {
+    fn start_bouncing(&mut self) {
+        self.bounce = true;
+
+        for conn in self.conn_map.values_mut() {
+            conn.stream.activate()
+        }
+    }
+
+    fn is_bouncing(&self) -> bool {
+        self.bounce
+    }
+}
 
 impl VsockMuxer {
     /// Muxer constructor.
@@ -321,6 +334,7 @@ impl VsockMuxer {
             killq: MuxerKillQ::new(),
             local_port_last: (1u32 << 30) - 1,
             local_port_set: HashSet::with_capacity(defs::MAX_CONNECTIONS),
+            bounce: false,
         };
 
         // Listen on the host initiated socket, for incoming connections.
@@ -402,6 +416,7 @@ impl VsockMuxer {
                                     self.cid,
                                     local_port,
                                     peer_port,
+                                    self.bounce,
                                 ),
                             )
                         })
@@ -629,6 +644,7 @@ impl VsockMuxer {
                         pkt.hdr.dst_port(),
                         pkt.hdr.src_port(),
                         pkt.hdr.buf_alloc(),
+                        self.bounce,
                     ),
                 )
             })
diff --git a/src/vmm/src/initrd.rs b/src/vmm/src/initrd.rs
index 9dfcd8bc16e..624ec397f73 100644
--- a/src/vmm/src/initrd.rs
+++ b/src/vmm/src/initrd.rs
@@ -1,14 +1,9 @@
 // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::fs::File;
-use std::os::unix::fs::MetadataExt;
-
 use vm_memory::{GuestAddress, GuestMemory, ReadVolatile, VolatileMemoryError};
 
 use crate::arch::initrd_load_addr;
-use crate::utils::u64_to_usize;
-use crate::vmm_config::boot_source::BootConfig;
 use crate::vstate::memory::GuestMemoryMmap;
 
 /// Errors associated with initrd loading.
@@ -20,8 +15,6 @@ pub enum InitrdError {
     Load,
     /// Cannot image metadata: {0}
     Metadata(std::io::Error),
-    /// Cannot copy initrd file fd: {0}
-    CloneFd(std::io::Error),
     /// Cannot load initrd due to an invalid image: {0}
     Read(VolatileMemoryError),
 }
@@ -36,31 +29,20 @@ pub struct InitrdConfig {
 }
 
 impl InitrdConfig {
-    /// Load initrd into guest memory based on the boot config.
-    pub fn from_config(
-        boot_cfg: &BootConfig,
-        vm_memory: &GuestMemoryMmap,
-    ) -> Result<Option<Self>, InitrdError> {
-        Ok(match &boot_cfg.initrd_file {
-            Some(f) => {
-                let f = f.try_clone().map_err(InitrdError::CloneFd)?;
-                Some(Self::from_file(vm_memory, f)?)
-            }
-            None => None,
-        })
-    }
-
     /// Loads the initrd from a file into guest memory.
-    pub fn from_file(vm_memory: &GuestMemoryMmap, mut file: File) -> Result<Self, InitrdError> {
-        let size = file.metadata().map_err(InitrdError::Metadata)?.size();
-        let size = u64_to_usize(size);
+    pub fn from_reader<R: ReadVolatile>(
+        vm_memory: &GuestMemoryMmap,
+        mut reader: R,
+        size: usize,
+    ) -> Result<Self, InitrdError> {
         let Some(address) = initrd_load_addr(vm_memory, size) else {
             return Err(InitrdError::Address);
         };
         let mut slice = vm_memory
             .get_slice(GuestAddress(address), size)
             .map_err(|_| InitrdError::Load)?;
-        file.read_exact_volatile(&mut slice)
+        reader
+            .read_exact_volatile(&mut slice)
             .map_err(InitrdError::Read)?;
 
         Ok(InitrdConfig {
@@ -105,7 +87,7 @@ mod tests {
 
         // Need to reset the cursor to read initrd properly.
         tempfile.seek(SeekFrom::Start(0)).unwrap();
-        let initrd = InitrdConfig::from_file(&gm, tempfile).unwrap();
+        let initrd = InitrdConfig::from_reader(&gm, tempfile, image.len()).unwrap();
         assert!(gm.address_in_range(initrd.address));
         assert_eq!(initrd.size, image.len());
     }
@@ -120,7 +102,7 @@ mod tests {
 
         // Need to reset the cursor to read initrd properly.
         tempfile.seek(SeekFrom::Start(0)).unwrap();
-        let res = InitrdConfig::from_file(&gm, tempfile);
+        let res = InitrdConfig::from_reader(&gm, tempfile, image.len());
         assert!(matches!(res, Err(InitrdError::Address)), "{:?}", res);
     }
 
@@ -134,7 +116,7 @@ mod tests {
 
         // Need to reset the cursor to read initrd properly.
         tempfile.seek(SeekFrom::Start(0)).unwrap();
-        let res = InitrdConfig::from_file(&gm, tempfile);
+        let res = InitrdConfig::from_reader(&gm, tempfile, image.len());
         assert!(matches!(res, Err(InitrdError::Address)), "{:?}", res);
     }
 }
diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs
index c3b2410dfe1..c5e811c2af9 100644
--- a/src/vmm/src/lib.rs
+++ b/src/vmm/src/lib.rs
@@ -115,8 +115,10 @@ pub mod vstate;
 pub mod initrd;
 
 use std::collections::HashMap;
-use std::io;
+use std::io::{self, Read, Write};
+use std::os::fd::RawFd;
 use std::os::unix::io::AsRawFd;
+use std::os::unix::net::UnixStream;
 use std::sync::mpsc::RecvTimeoutError;
 use std::sync::{Arc, Barrier, Mutex};
 use std::time::Duration;
@@ -127,6 +129,7 @@ use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEvent
 use seccomp::BpfProgram;
 use snapshot::Persist;
 use userfaultfd::Uffd;
+use vm_memory::GuestAddress;
 use vmm_sys_util::epoll::EventSet;
 use vmm_sys_util::eventfd::EventFd;
 use vmm_sys_util::terminal::Terminal;
@@ -138,12 +141,15 @@ use crate::devices::virtio::balloon::{BALLOON_DEV_ID, Balloon, BalloonConfig, Ba
 use crate::devices::virtio::block::device::Block;
 use crate::devices::virtio::net::Net;
 use crate::logger::{METRICS, MetricsError, error, info, warn};
-use crate::persist::{MicrovmState, MicrovmStateError, VmInfo};
+use crate::persist::{FaultReply, FaultRequest, MicrovmState, MicrovmStateError, VmInfo};
 use crate::rate_limiter::BucketUpdate;
 use crate::vmm_config::instance_info::{InstanceInfo, VmState};
-use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
+use crate::vstate::memory::{
+    GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion,
+};
 use crate::vstate::vcpu::VcpuState;
 pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
+use crate::vstate::vm::UserfaultData;
 pub use crate::vstate::vm::Vm;
 
 /// Shorthand type for the EventManager flavour used by Firecracker.
@@ -297,6 +303,8 @@ pub struct Vmm {
     // Save UFFD in order to keep it open in the Firecracker process, as well.
     #[allow(unused)]
     uffd: Option<Uffd>,
+    // Used for userfault communication with the UFFD handler when secret freedom is enabled
+    uffd_socket: Option<UnixStream>,
     vcpus_handles: Vec<VcpuHandle>,
     // Used by Vcpus and devices to initiate teardown; Vmm should never write here.
     vcpus_exit_evt: EventFd,
@@ -630,6 +638,98 @@ impl Vmm {
         self.shutdown_exit_code = Some(exit_code);
     }
 
+    fn process_vcpu_userfault(&mut self, vcpu: u32, userfault_data: UserfaultData) {
+        let offset = self
+            .vm
+            .guest_memory()
+            .gpa_to_offset(GuestAddress(userfault_data.gpa))
+            .expect("Failed to convert GPA to offset");
+
+        let fault_request = FaultRequest {
+            vcpu,
+            offset,
+            flags: userfault_data.flags,
+            token: None,
+        };
+        let fault_request_json =
+            serde_json::to_string(&fault_request).expect("Failed to serialize fault request");
+
+        self.uffd_socket
+            .as_ref()
+            .expect("Uffd socket is not set")
+            .write_all(fault_request_json.as_bytes())
+            .expect("Failed to write to uffd socket");
+    }
+
+    fn active_event_in_uffd_socket(&self, source: RawFd, event_set: EventSet) -> bool {
+        if let Some(uffd_socket) = &self.uffd_socket {
+            uffd_socket.as_raw_fd() == source && event_set == EventSet::IN
+        } else {
+            false
+        }
+    }
+
+    fn process_uffd_socket(&mut self) {
+        const BUFFER_SIZE: usize = 4096;
+
+        let stream = self.uffd_socket.as_mut().expect("Uffd socket is not set");
+
+        let mut buffer = [0u8; BUFFER_SIZE];
+        let mut current_pos = 0;
+
+        loop {
+            if current_pos < BUFFER_SIZE {
+                match stream.read(&mut buffer[current_pos..]) {
+                    Ok(0) => break,
+                    Ok(n) => current_pos += n,
+                    Err(e) if e.kind() == io::ErrorKind::WouldBlock => {
+                        if current_pos == 0 {
+                            break;
+                        }
+                    }
+                    Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                    Err(e) => panic!("Read error: {}", e),
+                }
+            }
+
+            let mut parser = serde_json::Deserializer::from_slice(&buffer[..current_pos])
+                .into_iter::<FaultReply>();
+            let mut total_consumed = 0;
+            let mut needs_more = false;
+
+            while let Some(result) = parser.next() {
+                match result {
+                    Ok(fault_reply) => {
+                        let vcpu = fault_reply.vcpu.expect("vCPU must be set");
+                        self.vcpus_handles[vcpu as usize].send_userfault_resolved();
+
+                        total_consumed = parser.byte_offset();
+                    }
+                    Err(e) if e.is_eof() => {
+                        needs_more = true;
+                        break;
+                    }
+                    Err(e) => {
+                        println!(
+                            "Buffer content: {:?}",
+                            std::str::from_utf8(&buffer[..current_pos])
+                        );
+                        panic!("Invalid JSON: {}", e);
+                    }
+                }
+            }
+
+            if total_consumed > 0 {
+                buffer.copy_within(total_consumed..current_pos, 0);
+                current_pos -= total_consumed;
+            }
+
+            if needs_more {
+                continue;
+            }
+        }
+    }
+
     /// Gets a reference to kvm-ioctls Vm
     #[cfg(feature = "gdb")]
     pub fn vm(&self) -> &Vm {
@@ -707,32 +807,43 @@ impl MutEventSubscriber for Vmm {
         let event_set = event.event_set();
 
         if source == self.vcpus_exit_evt.as_raw_fd() && event_set == EventSet::IN {
-            // Exit event handling should never do anything more than call 'self.stop()'.
             let _ = self.vcpus_exit_evt.read();
 
-            let exit_code = 'exit_code: {
-                // Query each vcpu for their exit_code.
-                for handle in &self.vcpus_handles {
-                    // Drain all vcpu responses that are pending from this vcpu until we find an
-                    // exit status.
-                    for response in handle.response_receiver().try_iter() {
-                        if let VcpuResponse::Exited(status) = response {
-                            // It could be that some vcpus exited successfully while others
-                            // errored out. Thus make sure that error exits from one vcpu always
-                            // takes precedence over "ok" exits
+            let mut pending_userfaults = Vec::with_capacity(self.vcpus_handles.len());
+            let mut should_exit = false;
+            let mut final_exit_code = FcExitCode::Ok;
+
+            // First pass: collect all responses and determine exit status
+            for (handle, index) in self.vcpus_handles.iter().zip(0u32..) {
+                for response in handle.response_receiver().try_iter() {
+                    match response {
+                        VcpuResponse::Exited(status) => {
+                            should_exit = true;
                             if status != FcExitCode::Ok {
-                                break 'exit_code status;
+                                final_exit_code = status;
                             }
                         }
+                        VcpuResponse::Userfault(userfault_data) => {
+                            pending_userfaults.push((index, userfault_data));
+                        }
+                        _ => panic!("Unexpected response from vcpu: {:?}", response),
                     }
                 }
+            }
 
-                // No CPUs exited with error status code, report "Ok"
-                FcExitCode::Ok
-            };
-            self.stop(exit_code);
-        } else {
-            error!("Spurious EventManager event for handler: Vmm");
+            // Process any pending userfaults
+            for (index, userfault_data) in pending_userfaults {
+                self.process_vcpu_userfault(index, userfault_data);
+            }
+
+            // Stop if we received an exit event
+            if should_exit {
+                self.stop(final_exit_code);
+            }
+        }
+
+        if self.active_event_in_uffd_socket(source, event_set) {
+            self.process_uffd_socket();
         }
     }
 
@@ -740,5 +851,11 @@ impl MutEventSubscriber for Vmm {
         if let Err(err) = ops.add(Events::new(&self.vcpus_exit_evt, EventSet::IN)) {
             error!("Failed to register vmm exit event: {}", err);
         }
+
+        if let Some(uffd_socket) = self.uffd_socket.as_ref()
+            && let Err(err) = ops.add(Events::new(uffd_socket, EventSet::IN))
+        {
+            error!("Failed to register UFFD socket: {}", err);
+        }
     }
 }
diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs
index 212b6105831..3f9817b50fd 100644
--- a/src/vmm/src/persist.rs
+++ b/src/vmm/src/persist.rs
@@ -6,7 +6,7 @@
 use std::fmt::Debug;
 use std::fs::{File, OpenOptions};
 use std::io::{self, Write};
-use std::mem::forget;
+use std::os::fd::RawFd;
 use std::os::unix::io::AsRawFd;
 use std::os::unix::net::UnixStream;
 use std::path::Path;
@@ -14,7 +14,7 @@ use std::sync::{Arc, Mutex};
 
 use semver::Version;
 use serde::{Deserialize, Serialize};
-use userfaultfd::{FeatureFlags, Uffd, UffdBuilder};
+use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder};
 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
 
 #[cfg(target_arch = "aarch64")]
@@ -47,6 +47,8 @@ use crate::{EventManager, Vmm, vstate};
 pub struct VmInfo {
     /// Guest memory size.
     pub mem_size_mib: u64,
+    /// Memory config
+    pub secret_free: bool,
     /// smt information
     pub smt: bool,
     /// CPU template type
@@ -61,6 +63,7 @@ impl From<&VmResources> for VmInfo {
     fn from(value: &VmResources) -> Self {
         Self {
             mem_size_mib: value.machine_config.mem_size_mib as u64,
+            secret_free: value.machine_config.secret_free,
             smt: value.machine_config.smt,
             cpu_template: StaticCpuTemplate::from(&value.machine_config.cpu_template),
             boot_source: value.boot_source.config.clone(),
@@ -110,6 +113,54 @@ pub struct GuestRegionUffdMapping {
     pub page_size_kib: usize,
 }
 
+/// FaultRequest
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct FaultRequest {
+    /// vCPU that encountered the fault
+    pub vcpu: u32,
+    /// Offset in guest_memfd where the fault occured
+    pub offset: u64,
+    /// Flags
+    pub flags: u64,
+    /// Async PF token
+    pub token: Option<u32>,
+}
+
+/// FaultReply
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct FaultReply {
+    /// vCPU that encountered the fault, from `FaultRequest` (if present, otherwise 0)
+    pub vcpu: Option<u32>,
+    /// Offset in guest_memfd where population started
+    pub offset: u64,
+    /// Length of populated area
+    pub len: u64,
+    /// Flags, must be copied from `FaultRequest`, otherwise 0
+    pub flags: u64,
+    /// Async PF token, must be copied from `FaultRequest`, otherwise None
+    pub token: Option<u32>,
+    /// Whether the populated pages are zero pages
+    pub zero: bool,
+}
+
+/// UffdMsgFromFirecracker
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(untagged)]
+pub enum UffdMsgFromFirecracker {
+    /// Mappings
+    Mappings(Vec<GuestRegionUffdMapping>),
+    /// FaultReq
+    FaultReq(FaultRequest),
+}
+
+/// UffdMsgToFirecracker
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(untagged)]
+pub enum UffdMsgToFirecracker {
+    /// FaultRep
+    FaultRep(FaultReply),
+}
+
 /// Errors related to saving and restoring Microvm state.
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
 pub enum MicrovmStateError {
@@ -320,6 +371,17 @@ pub fn restore_from_snapshot(
     vm_resources: &mut VmResources,
 ) -> Result<Arc<Mutex<Vmm>>, RestoreFromSnapshotError> {
     let mut microvm_state = snapshot_state_from_file(&params.snapshot_path)?;
+
+    if microvm_state.vm_info.secret_free && params.mem_backend.backend_type == MemBackendType::File
+    {
+        return Err(RestoreFromSnapshotError::Build(
+            BuildMicrovmFromSnapshotError::VmUpdateConfig(MachineConfigError::Incompatible(
+                "secret freedom",
+                "file memory backend",
+            )),
+        ));
+    }
+
     for entry in &params.network_overrides {
         microvm_state
             .device_states
@@ -352,6 +414,7 @@ pub fn restore_from_snapshot(
         .update_machine_config(&MachineConfigUpdate {
             vcpu_count: Some(vcpu_count),
             mem_size_mib: Some(u64_to_usize(microvm_state.vm_info.mem_size_mib)),
+            secret_free: Some(microvm_state.vm_info.secret_free),
             smt: Some(microvm_state.vm_info.smt),
             cpu_template: Some(microvm_state.vm_info.cpu_template),
             track_dirty_pages: Some(track_dirty_pages),
@@ -364,38 +427,12 @@ pub fn restore_from_snapshot(
     // Some sanity checks before building the microvm.
     snapshot_state_sanity_check(&microvm_state)?;
 
-    let mem_backend_path = &params.mem_backend.backend_path;
-    let mem_state = &microvm_state.vm_state.memory;
-
-    let (guest_memory, uffd) = match params.mem_backend.backend_type {
-        MemBackendType::File => {
-            if vm_resources.machine_config.huge_pages.is_hugetlbfs() {
-                return Err(RestoreFromSnapshotGuestMemoryError::File(
-                    GuestMemoryFromFileError::HugetlbfsSnapshot,
-                )
-                .into());
-            }
-            (
-                guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)
-                    .map_err(RestoreFromSnapshotGuestMemoryError::File)?,
-                None,
-            )
-        }
-        MemBackendType::Uffd => guest_memory_from_uffd(
-            mem_backend_path,
-            mem_state,
-            track_dirty_pages,
-            vm_resources.machine_config.huge_pages,
-        )
-        .map_err(RestoreFromSnapshotGuestMemoryError::Uffd)?,
-    };
     builder::build_microvm_from_snapshot(
         instance_info,
         event_manager,
         microvm_state,
-        guest_memory,
-        uffd,
         seccomp_filters,
+        params,
         vm_resources,
     )
     .map_err(RestoreFromSnapshotError::Build)
@@ -432,13 +469,14 @@ pub enum GuestMemoryFromFileError {
     HugetlbfsSnapshot,
 }
 
-fn guest_memory_from_file(
+/// Creates guest memory from a file.
+pub fn guest_memory_from_file(
     mem_file_path: &Path,
     mem_state: &GuestMemoryState,
     track_dirty_pages: bool,
 ) -> Result<Vec<GuestRegionMmap>, GuestMemoryFromFileError> {
     let mem_file = File::open(mem_file_path)?;
-    let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?;
+    let guest_mem = memory::file_private(mem_file, mem_state.regions(), track_dirty_pages)?;
     Ok(guest_mem)
 }
 
@@ -455,16 +493,25 @@ pub enum GuestMemoryFromUffdError {
     Connect(#[from] std::io::Error),
     /// Failed to sends file descriptor: {0}
     Send(#[from] vmm_sys_util::errno::Error),
+    /// Cannot restore hugetlbfs backed snapshot when using Secret Freedom.
+    HugetlbfsSnapshot,
 }
 
-fn guest_memory_from_uffd(
+type GuestMemoryResult =
+    Result<(Vec<GuestRegionMmap>, Option<Uffd>, Option<UnixStream>), GuestMemoryFromUffdError>;
+
+/// Creates guest memory using a UDS socket provided by a UFFD handler.
+pub fn guest_memory_from_uffd(
     mem_uds_path: &Path,
     mem_state: &GuestMemoryState,
     track_dirty_pages: bool,
     huge_pages: HugePageConfig,
-) -> Result<(Vec<GuestRegionMmap>, Option<Uffd>), GuestMemoryFromUffdError> {
+    guest_memfd: Option<File>,
+    userfault_bitmap_memfd: Option<&File>,
+) -> GuestMemoryResult {
+    let guest_memfd_fd = guest_memfd.as_ref().map(|f| f.as_raw_fd());
     let (guest_memory, backend_mappings) =
-        create_guest_memory(mem_state, track_dirty_pages, huge_pages)?;
+        create_guest_memory(mem_state, track_dirty_pages, huge_pages, guest_memfd)?;
 
     let mut uffd_builder = UffdBuilder::new();
 
@@ -481,22 +528,42 @@ fn guest_memory_from_uffd(
         .create()
         .map_err(GuestMemoryFromUffdError::Create)?;
 
+    let mut mode = RegisterMode::MISSING;
+    let mut fds = vec![uffd.as_raw_fd()];
+
+    if let Some(gmem) = guest_memfd_fd {
+        mode = RegisterMode::MINOR;
+        fds.push(gmem);
+        fds.push(
+            userfault_bitmap_memfd
+                .expect("memfd is not present")
+                .as_raw_fd(),
+        );
+    }
+
     for mem_region in guest_memory.iter() {
-        uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _)
+        uffd.register_with_mode(mem_region.as_ptr().cast(), mem_region.size() as _, mode)
             .map_err(GuestMemoryFromUffdError::Register)?;
     }
 
-    send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?;
+    let socket = send_uffd_handshake(mem_uds_path, &backend_mappings, fds)?;
 
-    Ok((guest_memory, Some(uffd)))
+    Ok((guest_memory, Some(uffd), Some(socket)))
 }
 
 fn create_guest_memory(
     mem_state: &GuestMemoryState,
     track_dirty_pages: bool,
     huge_pages: HugePageConfig,
+    guest_memfd: Option<File>,
 ) -> Result<(Vec<GuestRegionMmap>, Vec<GuestRegionUffdMapping>), GuestMemoryFromUffdError> {
-    let guest_memory = memory::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?;
+    let guest_memory = match guest_memfd {
+        Some(file) => {
+            memory::file_shared(file, mem_state.regions(), track_dirty_pages, huge_pages)?
+        }
+        None => memory::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?,
+    };
+
     let mut backend_mappings = Vec::with_capacity(guest_memory.len());
     let mut offset = 0;
     for mem_region in guest_memory.iter() {
@@ -517,15 +584,17 @@ fn create_guest_memory(
 fn send_uffd_handshake(
     mem_uds_path: &Path,
     backend_mappings: &[GuestRegionUffdMapping],
-    uffd: &impl AsRawFd,
-) -> Result<(), GuestMemoryFromUffdError> {
+    fds: Vec<RawFd>,
+) -> Result<UnixStream, GuestMemoryFromUffdError> {
     // This is safe to unwrap() because we control the contents of the vector
     // (i.e GuestRegionUffdMapping entries).
     let backend_mappings = serde_json::to_string(backend_mappings).unwrap();
 
     let socket = UnixStream::connect(mem_uds_path)?;
-    socket.send_with_fd(
-        backend_mappings.as_bytes(),
+    socket.set_nonblocking(true)?;
+
+    socket.send_with_fds(
+        &[backend_mappings.as_bytes()],
         // In the happy case we can close the fd since the other process has it open and is
         // using it to serve us pages.
         //
@@ -556,15 +625,10 @@ fn send_uffd_handshake(
         // Moreover, Firecracker holds a copy of the UFFD fd as well, so that even if the
         // page fault handler process does not tear down Firecracker when necessary, the
         // uffd will still be alive but with no one to serve faults, leading to guest freeze.
-        uffd.as_raw_fd(),
+        &fds,
     )?;
 
-    // We prevent Rust from closing the socket file descriptor to avoid a potential race condition
-    // between the mappings message and the connection shutdown. If the latter arrives at the UFFD
-    // handler first, the handler never sees the mappings.
-    forget(socket);
-
-    Ok(())
+    Ok(socket)
 }
 
 #[cfg(test)]
@@ -697,7 +761,7 @@ mod tests {
         };
 
         let (_, uffd_regions) =
-            create_guest_memory(&mem_state, false, HugePageConfig::None).unwrap();
+            create_guest_memory(&mem_state, false, HugePageConfig::None, None).unwrap();
 
         assert_eq!(uffd_regions.len(), 1);
         assert_eq!(uffd_regions[0].size, 0x20000);
@@ -731,7 +795,7 @@ mod tests {
 
         let listener = UnixListener::bind(uds_path).expect("Cannot bind to socket path");
 
-        send_uffd_handshake(uds_path, &uffd_regions, &std::io::stdin()).unwrap();
+        send_uffd_handshake(uds_path, &uffd_regions, vec![std::io::stdin().as_raw_fd()]).unwrap();
 
         let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
 
diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs
index 0d2f4bbed22..819dbd3d359 100644
--- a/src/vmm/src/resources.rs
+++ b/src/vmm/src/resources.rs
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use std::convert::From;
+use std::fs::File;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex, MutexGuard};
 
@@ -9,6 +10,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::cpu_config::templates::CustomCpuTemplate;
 use crate::device_manager::persist::SharedDeviceType;
+use crate::devices::virtio::block::device::Block;
 use crate::logger::info;
 use crate::mmds;
 use crate::mmds::data_store::{Mmds, MmdsVersion};
@@ -31,7 +33,7 @@ use crate::vmm_config::net::*;
 use crate::vmm_config::serial::SerialConfig;
 use crate::vmm_config::vsock::*;
 use crate::vstate::memory;
-use crate::vstate::memory::{GuestRegionMmap, MemoryError};
+use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
 
 /// Errors encountered when configuring microVM resources.
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -237,7 +239,14 @@ impl VmResources {
                 self.balloon.set_device(balloon);
 
                 if self.machine_config.huge_pages != HugePageConfig::None {
-                    return Err(ResourcesError::BalloonDevice(BalloonConfigError::HugePages));
+                    return Err(ResourcesError::BalloonDevice(
+                        BalloonConfigError::IncompatibleWith("huge pages"),
+                    ));
+                }
+                if self.machine_config.secret_free {
+                    return Err(ResourcesError::BalloonDevice(
+                        BalloonConfigError::IncompatibleWith("secret freedom"),
+                    ));
                 }
             }
 
@@ -279,7 +288,31 @@ impl VmResources {
         }
 
         if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None {
-            return Err(MachineConfigError::BalloonAndHugePages);
+            return Err(MachineConfigError::Incompatible(
+                "balloon device",
+                "huge pages",
+            ));
+        }
+        if self.balloon.get().is_some() && updated.secret_free {
+            return Err(MachineConfigError::Incompatible(
+                "balloon device",
+                "secret freedom",
+            ));
+        }
+        if updated.secret_free {
+            if self.vhost_user_devices_used() {
+                return Err(MachineConfigError::Incompatible(
+                    "vhost-user devices",
+                    "userspace bounce buffers",
+                ));
+            }
+
+            if self.async_block_engine_used() {
+                return Err(MachineConfigError::Incompatible(
+                    "async block engine",
+                    "userspace bounce buffers",
+                ));
+            }
         }
         self.machine_config = updated;
 
@@ -338,7 +371,11 @@ impl VmResources {
         }
 
         if self.machine_config.huge_pages != HugePageConfig::None {
-            return Err(BalloonConfigError::HugePages);
+            return Err(BalloonConfigError::IncompatibleWith("huge pages"));
+        }
+
+        if self.machine_config.secret_free {
+            return Err(BalloonConfigError::IncompatibleWith("secret freedom"));
         }
 
         self.balloon.set(config)
@@ -364,6 +401,17 @@ impl VmResources {
         &mut self,
         block_device_config: BlockDeviceConfig,
     ) -> Result<(), DriveError> {
+        if self.machine_config.secret_free {
+            if block_device_config.file_engine_type == Some(FileEngineType::Async) {
+                return Err(DriveError::IncompatibleWithSecretFreedom(
+                    "async file engine",
+                ));
+            }
+
+            if block_device_config.socket.is_some() {
+                return Err(DriveError::IncompatibleWithSecretFreedom("vhost-user-blk"));
+            }
+        }
         self.block.insert(block_device_config)
     }
 
@@ -463,18 +511,37 @@ impl VmResources {
         Ok(())
     }
 
+    /// Returns true if any vhost user devices are configured int his [`VmResources`] object
+    pub fn vhost_user_devices_used(&self) -> bool {
+        self.block
+            .devices
+            .iter()
+            .any(|b| b.lock().expect("Poisoned lock").is_vhost_user())
+    }
+
+    fn async_block_engine_used(&self) -> bool {
+        self.block
+            .devices
+            .iter()
+            .any(|b| match &*b.lock().unwrap() {
+                Block::Virtio(b) => b.file_engine_type() == FileEngineType::Async,
+                Block::VhostUser(_) => false,
+            })
+    }
+
+    /// Gets the size of the guest memory, in bytes
+    pub fn memory_size(&self) -> usize {
+        mib_to_bytes(self.machine_config.mem_size_mib)
+    }
+
     /// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
     ///
     /// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
     /// prefers anonymous memory for performance reasons.
-    pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
-        let vhost_user_device_used = self
-            .block
-            .devices
-            .iter()
-            .any(|b| b.lock().expect("Poisoned lock").is_vhost_user());
-
-        // Page faults are more expensive for shared memory mapping, including  memfd.
+    pub fn allocate_guest_memory(
+        &self,
+        guest_memfd: Option<File>,
+    ) -> Result<Vec<GuestRegionMmap>, MemoryError> {
         // For this reason, we only back guest memory with a memfd
         // if a vhost-user-blk device is configured in the VM, otherwise we fall back to
         // an anonymous private memory.
@@ -483,20 +550,35 @@ impl VmResources {
         // because that would require running a backend process. If in the future we converge to
         // a single way of backing guest memory for vhost-user and non-vhost-user cases,
         // that would not be worth the effort.
-        let regions =
-            crate::arch::arch_memory_regions(mib_to_bytes(self.machine_config.mem_size_mib));
-        if vhost_user_device_used {
-            memory::memfd_backed(
-                regions.as_ref(),
+        let regions = crate::arch::arch_memory_regions(self.memory_size()).into_iter();
+        match guest_memfd {
+            Some(file) => memory::file_shared(
+                file,
+                regions,
                 self.machine_config.track_dirty_pages,
                 self.machine_config.huge_pages,
-            )
-        } else {
-            memory::anonymous(
-                regions.into_iter(),
-                self.machine_config.track_dirty_pages,
-                self.machine_config.huge_pages,
-            )
+            ),
+            None => {
+                if self.vhost_user_devices_used() {
+                    let memfd = create_memfd(
+                        self.memory_size() as u64,
+                        self.machine_config.huge_pages.into(),
+                    )?
+                    .into_file();
+                    memory::file_shared(
+                        memfd,
+                        regions,
+                        self.machine_config.track_dirty_pages,
+                        self.machine_config.huge_pages,
+                    )
+                } else {
+                    memory::anonymous(
+                        regions.into_iter(),
+                        self.machine_config.track_dirty_pages,
+                        self.machine_config.huge_pages,
+                    )
+                }
+            }
         }
     }
 }
@@ -1370,6 +1452,7 @@ mod tests {
         let mut aux_vm_config = MachineConfigUpdate {
             vcpu_count: Some(32),
             mem_size_mib: Some(512),
+            secret_free: Some(false),
             smt: Some(false),
             #[cfg(target_arch = "x86_64")]
             cpu_template: Some(StaticCpuTemplate::T2),
@@ -1391,44 +1474,6 @@ mod tests {
             aux_vm_config
         );
 
-        // Invalid vcpu count.
-        aux_vm_config.vcpu_count = Some(0);
-        assert_eq!(
-            vm_resources.update_machine_config(&aux_vm_config),
-            Err(MachineConfigError::InvalidVcpuCount)
-        );
-        aux_vm_config.vcpu_count = Some(33);
-        assert_eq!(
-            vm_resources.update_machine_config(&aux_vm_config),
-            Err(MachineConfigError::InvalidVcpuCount)
-        );
-
-        // Check that SMT is not supported on aarch64, and that on x86_64 enabling it requires vcpu
-        // count to be even.
-        aux_vm_config.smt = Some(true);
-        #[cfg(target_arch = "aarch64")]
-        assert_eq!(
-            vm_resources.update_machine_config(&aux_vm_config),
-            Err(MachineConfigError::SmtNotSupported)
-        );
-        aux_vm_config.vcpu_count = Some(3);
-        #[cfg(target_arch = "x86_64")]
-        assert_eq!(
-            vm_resources.update_machine_config(&aux_vm_config),
-            Err(MachineConfigError::InvalidVcpuCount)
-        );
-        aux_vm_config.vcpu_count = Some(32);
-        #[cfg(target_arch = "x86_64")]
-        vm_resources.update_machine_config(&aux_vm_config).unwrap();
-        aux_vm_config.smt = Some(false);
-
-        // Invalid mem_size_mib.
-        aux_vm_config.mem_size_mib = Some(0);
-        assert_eq!(
-            vm_resources.update_machine_config(&aux_vm_config),
-            Err(MachineConfigError::InvalidMemorySize)
-        );
-
         // Incompatible mem_size_mib with balloon size.
         vm_resources.machine_config.mem_size_mib = 128;
         vm_resources
@@ -1447,23 +1492,6 @@ mod tests {
         // mem_size_mib compatible with balloon size.
         aux_vm_config.mem_size_mib = Some(256);
         vm_resources.update_machine_config(&aux_vm_config).unwrap();
-
-        // mem_size_mib incompatible with huge pages configuration
-        aux_vm_config.mem_size_mib = Some(129);
-        aux_vm_config.huge_pages = Some(HugePageConfig::Hugetlbfs2M);
-        assert_eq!(
-            vm_resources
-                .update_machine_config(&aux_vm_config)
-                .unwrap_err(),
-            MachineConfigError::InvalidMemorySize
-        );
-
-        // mem_size_mib compatible with huge page configuration
-        aux_vm_config.mem_size_mib = Some(2048);
-        // Remove the balloon device config that's added by `default_vm_resources` as it would
-        // trigger the "ballooning incompatible with huge pages" check.
-        vm_resources.balloon = BalloonBuilder::new();
-        vm_resources.update_machine_config(&aux_vm_config).unwrap();
     }
 
     #[test]
@@ -1517,7 +1545,7 @@ mod tests {
         assert!(
             matches!(
                 err,
-                ResourcesError::BalloonDevice(BalloonConfigError::HugePages)
+                ResourcesError::BalloonDevice(BalloonConfigError::IncompatibleWith("huge pages"))
             ),
             "{:?}",
             err
diff --git a/src/vmm/src/vmm_config/balloon.rs b/src/vmm/src/vmm_config/balloon.rs
index 83d419c49db..87ddc7fb132 100644
--- a/src/vmm/src/vmm_config/balloon.rs
+++ b/src/vmm/src/vmm_config/balloon.rs
@@ -20,8 +20,8 @@ pub enum BalloonConfigError {
     TooManyPagesRequested,
     /// Error creating the balloon device: {0}
     CreateFailure(crate::devices::virtio::balloon::BalloonError),
-    /// Firecracker's huge pages support is incompatible with memory ballooning.
-    HugePages,
+    /// Memory ballooning is incompatible with {0}.
+    IncompatibleWith(&'static str),
 }
 
 /// This struct represents the strongly typed equivalent of the json body
diff --git a/src/vmm/src/vmm_config/drive.rs b/src/vmm/src/vmm_config/drive.rs
index 9e301eff751..88a9b813874 100644
--- a/src/vmm/src/vmm_config/drive.rs
+++ b/src/vmm/src/vmm_config/drive.rs
@@ -24,6 +24,8 @@ pub enum DriveError {
     DeviceUpdate(VmmError),
     /// A root block device already exists!
     RootBlockDeviceAlreadyAdded,
+    /// {0} is incompatible with secret freedom.
+    IncompatibleWithSecretFreedom(&'static str),
 }
 
 /// Use this structure to set up the Block Device before booting the kernel.
diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs
index cfe7105fdf8..3d30860144e 100644
--- a/src/vmm/src/vmm_config/machine_config.rs
+++ b/src/vmm/src/vmm_config/machine_config.rs
@@ -27,10 +27,8 @@ pub enum MachineConfigError {
     /// Enabling simultaneous multithreading is not supported on aarch64.
     #[cfg(target_arch = "aarch64")]
     SmtNotSupported,
-    /// Could not determine host kernel version when checking hugetlbfs compatibility
-    KernelVersion,
-    /// Firecracker's huge pages support is incompatible with memory ballooning.
-    BalloonAndHugePages,
+    /// '{0}' and '{1}' are mutually exclusive and cannot be used together.
+    Incompatible(&'static str, &'static str)
 }
 
 /// Describes the possible (huge)page configurations for a microVM's memory.
@@ -97,6 +95,11 @@ pub struct MachineConfig {
     pub vcpu_count: u8,
     /// The memory size in MiB.
     pub mem_size_mib: usize,
+    /// Whether guest_memfd should be used to back normal guest memory. If this is enabled
+    /// and any devices are attached to the VM, userspace bounce buffers will be used
+    /// as I/O into secret free memory is not possible.
+    #[serde(default)]
+    pub secret_free: bool,
     /// Enables or disabled SMT.
     #[serde(default)]
     pub smt: bool,
@@ -153,6 +156,7 @@ impl Default for MachineConfig {
         Self {
             vcpu_count: 1,
             mem_size_mib: DEFAULT_MEM_SIZE_MIB,
+            secret_free: false,
             smt: false,
             cpu_template: None,
             track_dirty_pages: false,
@@ -178,6 +182,9 @@ pub struct MachineConfigUpdate {
     /// The memory size in MiB.
     #[serde(default)]
     pub mem_size_mib: Option<usize>,
+    /// Whether secret freedom should be enabled
+    #[serde(default)]
+    pub secret_free: Option<bool>,
     /// Enables or disabled SMT.
     #[serde(default)]
     pub smt: Option<bool>,
@@ -210,6 +217,7 @@ impl From<MachineConfig> for MachineConfigUpdate {
         MachineConfigUpdate {
             vcpu_count: Some(cfg.vcpu_count),
             mem_size_mib: Some(cfg.mem_size_mib),
+            secret_free: Some(cfg.secret_free),
             smt: Some(cfg.smt),
             cpu_template: cfg.static_template(),
             track_dirty_pages: Some(cfg.track_dirty_pages),
@@ -263,11 +271,27 @@ impl MachineConfig {
 
         let mem_size_mib = update.mem_size_mib.unwrap_or(self.mem_size_mib);
         let page_config = update.huge_pages.unwrap_or(self.huge_pages);
+        let secret_free = update.secret_free.unwrap_or(self.secret_free);
+        let track_dirty_pages = update.track_dirty_pages.unwrap_or(self.track_dirty_pages);
 
         if mem_size_mib == 0 || !page_config.is_valid_mem_size(mem_size_mib) {
             return Err(MachineConfigError::InvalidMemorySize);
         }
 
+        if secret_free && page_config != HugePageConfig::None {
+            return Err(MachineConfigError::Incompatible(
+                "secret freedom",
+                "huge pages",
+            ));
+        }
+
+        if secret_free && track_dirty_pages {
+            return Err(MachineConfigError::Incompatible(
+                "secret freedom",
+                "diff snapshots",
+            ));
+        }
+
         let cpu_template = match update.cpu_template {
             None => self.cpu_template.clone(),
             Some(StaticCpuTemplate::None) => None,
@@ -277,9 +301,10 @@ impl MachineConfig {
         Ok(MachineConfig {
             vcpu_count,
             mem_size_mib,
+            secret_free,
             smt,
             cpu_template,
-            track_dirty_pages: update.track_dirty_pages.unwrap_or(self.track_dirty_pages),
+            track_dirty_pages,
             huge_pages: page_config,
             #[cfg(feature = "gdb")]
             gdb_socket_path: update.gdb_socket_path.clone(),
@@ -290,7 +315,126 @@ impl MachineConfig {
 #[cfg(test)]
 mod tests {
     use crate::cpu_config::templates::{CpuTemplateType, CustomCpuTemplate, StaticCpuTemplate};
-    use crate::vmm_config::machine_config::MachineConfig;
+    use crate::vmm_config::machine_config::{
+        HugePageConfig, MachineConfig, MachineConfigError, MachineConfigUpdate,
+    };
+
+    #[test]
+    #[allow(unused)] // some assertions exist only on specific architectures.
+    fn test_machine_config_update() {
+        let mconf = MachineConfig::default();
+
+        // Assert that the default machine config is valid
+        assert_eq!(
+            mconf
+                .update(&MachineConfigUpdate::from(mconf.clone()))
+                .unwrap(),
+            mconf
+        );
+
+        // Invalid vCPU counts
+        let res = mconf.update(&MachineConfigUpdate {
+            vcpu_count: Some(0),
+            ..Default::default()
+        });
+        assert_eq!(res, Err(MachineConfigError::InvalidVcpuCount));
+
+        let res = mconf.update(&MachineConfigUpdate {
+            vcpu_count: Some(33),
+            ..Default::default()
+        });
+        assert_eq!(res, Err(MachineConfigError::InvalidVcpuCount));
+
+        // Invalid memory size
+        let res = mconf.update(&MachineConfigUpdate {
+            mem_size_mib: Some(0),
+            ..Default::default()
+        });
+        assert_eq!(res, Err(MachineConfigError::InvalidMemorySize));
+
+        // Memory Size incompatible with huge page configuration
+        let res = mconf.update(&MachineConfigUpdate {
+            mem_size_mib: Some(31),
+            huge_pages: Some(HugePageConfig::Hugetlbfs2M),
+            ..Default::default()
+        });
+        assert_eq!(res, Err(MachineConfigError::InvalidMemorySize));
+
+        // works if the memory size is a multiple of huge page size indeed
+        let updated = mconf
+            .update(&MachineConfigUpdate {
+                mem_size_mib: Some(32),
+                huge_pages: Some(HugePageConfig::Hugetlbfs2M),
+                ..Default::default()
+            })
+            .unwrap();
+        assert_eq!(updated.huge_pages, HugePageConfig::Hugetlbfs2M);
+        assert_eq!(updated.mem_size_mib, 32);
+
+        let res = mconf.update(&MachineConfigUpdate {
+            huge_pages: Some(HugePageConfig::Hugetlbfs2M),
+            secret_free: Some(true),
+            ..Default::default()
+        });
+        assert_eq!(
+            res,
+            Err(MachineConfigError::Incompatible(
+                "secret freedom",
+                "huge pages"
+            ))
+        );
+
+        let res = mconf.update(&MachineConfigUpdate {
+            track_dirty_pages: Some(true),
+            secret_free: Some(true),
+            ..Default::default()
+        });
+        assert_eq!(
+            res,
+            Err(MachineConfigError::Incompatible(
+                "secret freedom",
+                "diff snapshots"
+            ))
+        );
+    }
+
+    #[test]
+    #[cfg(target_arch = "aarch64")]
+    fn test_machine_config_update_aarch64() {
+        let mconf = MachineConfig::default();
+
+        // Check that SMT is not supported on aarch64
+        let res = mconf.update(&MachineConfigUpdate {
+            smt: Some(true),
+            ..Default::default()
+        });
+        assert_eq!(res, Err(MachineConfigError::SmtNotSupported));
+    }
+
+    #[test]
+    #[cfg(target_arch = "x86_64")]
+    fn test_machine_config_update_x86_64() {
+        let mconf = MachineConfig::default();
+
+        // Test that SMT requires an even vcpu count
+        let res = mconf.update(&MachineConfigUpdate {
+            vcpu_count: Some(3),
+            smt: Some(true),
+            ..Default::default()
+        });
+        assert_eq!(res, Err(MachineConfigError::InvalidVcpuCount));
+
+        // Works if the vcpu count is even indeed
+        let updated = mconf
+            .update(&MachineConfigUpdate {
+                vcpu_count: Some(32),
+                smt: Some(true),
+                ..Default::default()
+            })
+            .unwrap();
+        assert_eq!(updated.vcpu_count, 32);
+        assert!(updated.smt);
+    }
 
     // Ensure the special (de)serialization logic for the cpu_template field works:
     // only static cpu templates can be specified via the machine-config endpoint, but
diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs
index 38ee7cc2ce6..2e547131958 100644
--- a/src/vmm/src/vstate/memory.rs
+++ b/src/vmm/src/vstate/memory.rs
@@ -6,7 +6,9 @@
 // found in the THIRD-PARTY file.
 
 use std::fs::File;
-use std::io::SeekFrom;
+use std::io::{Read, Seek, SeekFrom, Write};
+use std::os::fd::AsRawFd;
+use std::ptr::null_mut;
 use std::sync::Arc;
 
 use serde::{Deserialize, Serialize};
@@ -17,7 +19,10 @@ pub use vm_memory::{
     Address, ByteValued, Bytes, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion,
     GuestUsize, MemoryRegionAddress, MmapRegion, address,
 };
-use vm_memory::{Error as VmMemoryError, GuestMemoryError, WriteVolatile};
+use vm_memory::{
+    Error as VmMemoryError, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice,
+    WriteVolatile,
+};
 use vmm_sys_util::errno;
 
 use crate::DirtyBitmap;
@@ -48,6 +53,144 @@ pub enum MemoryError {
     MemfdSetLen(std::io::Error),
     /// Total sum of memory regions exceeds largest possible file offset
     OffsetTooLarge,
+    /// Error calling mmap: {0}
+    Mmap(std::io::Error),
+}
+
+/// Newtype that implements [`ReadVolatile`] and [`WriteVolatile`] if `T` implements `Read` or
+/// `Write` respectively, by reading/writing using a bounce buffer, and memcpy-ing into the
+/// [`VolatileSlice`].
+///
+/// Bounce buffers are allocated on the heap, as on-stack bounce buffers could cause stack
+/// overflows. If `N == 0` then bounce buffers will be allocated on demand.
+#[derive(Debug)]
+pub struct MaybeBounce<T, const N: usize = 0> {
+    pub(crate) target: T,
+    persistent_buffer: Option<Box<[u8; N]>>,
+}
+
+impl<T> MaybeBounce<T, 0> {
+    /// Creates a new `MaybeBounce` that always allocates a bounce
+    /// buffer on-demand
+    pub fn new(target: T, should_bounce: bool) -> Self {
+        MaybeBounce::new_persistent(target, should_bounce)
+    }
+}
+
+impl<T, const N: usize> MaybeBounce<T, N> {
+    /// Creates a new `MaybeBounce` that uses a persistent, fixed size bounce buffer
+    /// of size `N`. If a read/write request exceeds the size of this bounce buffer, it
+    /// is split into multiple, `<= N`-size read/writes.
+    pub fn new_persistent(target: T, should_bounce: bool) -> Self {
+        let mut bounce = MaybeBounce {
+            target,
+            persistent_buffer: None,
+        };
+
+        if should_bounce {
+            bounce.activate()
+        }
+
+        bounce
+    }
+
+    /// Activates this [`MaybeBounce`] to start doing reads/writes via a bounce buffer,
+    /// which is allocated on the heap by this function (e.g. if `activate()` is never called,
+    /// no bounce buffer is ever allocated).
+    pub fn activate(&mut self) {
+        self.persistent_buffer = Some(vec![0u8; N].into_boxed_slice().try_into().unwrap())
+    }
+
+    /// Returns `true` if this `MaybeBounce` is actually bouncing buffers.
+    pub fn is_activated(&self) -> bool {
+        self.persistent_buffer.is_some()
+    }
+}
+
+impl<T: ReadVolatile, const N: usize> ReadVolatile for MaybeBounce<T, N> {
+    fn read_volatile<B: BitmapSlice>(
+        &mut self,
+        buf: &mut VolatileSlice<B>,
+    ) -> Result<usize, VolatileMemoryError> {
+        if let Some(ref mut persistent) = self.persistent_buffer {
+            let mut bbuf = (N == 0).then(|| vec![0u8; buf.len()]);
+            let bbuf = bbuf.as_deref_mut().unwrap_or(persistent.as_mut_slice());
+
+            let mut buf = buf.offset(0)?;
+            let mut total = 0;
+            while !buf.is_empty() {
+                let how_much = buf.len().min(bbuf.len());
+                let n = self
+                    .target
+                    .read_volatile(&mut VolatileSlice::from(&mut bbuf[..how_much]))?;
+                buf.copy_from(&bbuf[..n]);
+
+                buf = buf.offset(n)?;
+                total += n;
+
+                if n < how_much {
+                    break;
+                }
+            }
+
+            Ok(total)
+        } else {
+            self.target.read_volatile(buf)
+        }
+    }
+}
+
+impl<T: WriteVolatile, const N: usize> WriteVolatile for MaybeBounce<T, N> {
+    fn write_volatile<B: BitmapSlice>(
+        &mut self,
+        buf: &VolatileSlice<B>,
+    ) -> Result<usize, VolatileMemoryError> {
+        if let Some(ref mut persistent) = self.persistent_buffer {
+            let mut bbuf = (N == 0).then(|| vec![0u8; buf.len()]);
+            let bbuf = bbuf.as_deref_mut().unwrap_or(persistent.as_mut_slice());
+
+            let mut buf = buf.offset(0)?;
+            let mut total = 0;
+            while !buf.is_empty() {
+                let how_much = buf.copy_to(bbuf);
+                let n = self
+                    .target
+                    .write_volatile(&VolatileSlice::from(&mut bbuf[..how_much]))?;
+                buf = buf.offset(n)?;
+                total += n;
+
+                if n < how_much {
+                    break;
+                }
+            }
+
+            Ok(total)
+        } else {
+            self.target.write_volatile(buf)
+        }
+    }
+}
+
+impl<R: Read, const N: usize> Read for MaybeBounce<R, N> {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        self.target.read(buf)
+    }
+}
+
+impl<W: Write, const N: usize> Write for MaybeBounce<W, N> {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        self.target.write(buf)
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.target.flush()
+    }
+}
+
+impl<S: Seek, const N: usize> Seek for MaybeBounce<S, N> {
+    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+        self.target.seek(pos)
+    }
 }
 
 /// Creates a `Vec` of `GuestRegionMmap` with the given configuration
@@ -64,16 +207,40 @@ pub fn create(
             let mut builder = MmapRegionBuilder::new_with_bitmap(
                 size,
                 track_dirty_pages.then(|| AtomicBitmap::with_len(size)),
-            )
-            .with_mmap_prot(libc::PROT_READ | libc::PROT_WRITE)
-            .with_mmap_flags(libc::MAP_NORESERVE | mmap_flags);
+            );
 
-            if let Some(ref file) = file {
+            // when computing offset below we ensure it fits into i64
+            #[allow(clippy::cast_possible_wrap)]
+            let (fd, fd_off) = if let Some(ref file) = file {
                 let file_offset = FileOffset::from_arc(Arc::clone(file), offset);
 
                 builder = builder.with_file_offset(file_offset);
+
+                (file.as_raw_fd(), offset as libc::off_t)
+            } else {
+                (-1, 0)
+            };
+
+            // SAFETY: the arguments to mmap cannot cause any memory unsafety in the rust sense
+            let ptr = unsafe {
+                libc::mmap(
+                    null_mut(),
+                    size,
+                    libc::PROT_READ | libc::PROT_WRITE,
+                    libc::MAP_NORESERVE | mmap_flags,
+                    fd,
+                    fd_off,
+                )
+            };
+
+            if ptr == libc::MAP_FAILED {
+                return Err(MemoryError::Mmap(std::io::Error::last_os_error()));
             }
 
+            // SAFETY: we check above that mmap succeeded, and the size we passed to builder is the
+            // same as the size of the mmap area.
+            let builder = unsafe { builder.with_raw_mmap_pointer(ptr.cast()) };
+
             offset = match offset.checked_add(size as u64) {
                 None => return Err(MemoryError::OffsetTooLarge),
                 Some(new_off) if new_off >= i64::MAX as u64 => {
@@ -92,18 +259,16 @@ pub fn create(
 }
 
 /// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
-pub fn memfd_backed(
-    regions: &[(GuestAddress, usize)],
+pub fn file_shared(
+    file: File,
+    regions: impl Iterator<Item = (GuestAddress, usize)>,
     track_dirty_pages: bool,
     huge_pages: HugePageConfig,
 ) -> Result<Vec<GuestRegionMmap>, MemoryError> {
-    let size = regions.iter().map(|&(_, size)| size as u64).sum();
-    let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
-
     create(
-        regions.iter().copied(),
+        regions,
         libc::MAP_SHARED | huge_pages.mmap_flags(),
-        Some(memfd_file),
+        Some(file),
         track_dirty_pages,
     )
 }
@@ -124,7 +289,7 @@ pub fn anonymous(
 
 /// Creates a GuestMemoryMmap given a `file` containing the data
 /// and a `state` containing mapping information.
-pub fn snapshot_file(
+pub fn file_private(
     file: File,
     regions: impl Iterator<Item = (GuestAddress, usize)>,
     track_dirty_pages: bool,
@@ -158,6 +323,12 @@ where
 
     /// Store the dirty bitmap in internal store
     fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize);
+
+    /// Convert guest physical address to file offset
+    fn gpa_to_offset(&self, gpa: GuestAddress) -> Option<u64>;
+
+    /// Convert file offset to guest physical address
+    fn offset_to_gpa(&self, offset: u64) -> Option<GuestAddress>;
 }
 
 /// State of a guest memory region saved to file/buffer.
@@ -308,9 +479,38 @@ impl GuestMemoryExtension for GuestMemoryMmap {
             }
         });
     }
+
+    /// Convert guest physical address to file offset
+    fn gpa_to_offset(&self, gpa: GuestAddress) -> Option<u64> {
+        self.find_region(gpa).and_then(|r| {
+            r.file_offset()
+                .map(|file_offset| gpa.0 - r.start_addr().0 + file_offset.start())
+        })
+    }
+
+    /// Convert file offset to guest physical address
+    fn offset_to_gpa(&self, offset: u64) -> Option<GuestAddress> {
+        self.iter().find_map(|region| {
+            if let Some(reg_offset) = region.file_offset() {
+                let region_start = reg_offset.start();
+                let region_size = region.size();
+
+                if offset >= region_start && offset < region_start + region_size as u64 {
+                    Some(GuestAddress(
+                        region.start_addr().0 + (offset - region_start),
+                    ))
+                } else {
+                    None
+                }
+            } else {
+                None
+            }
+        })
+    }
 }
 
-fn create_memfd(
+/// Creates a memfd of the given size and huge pages configuration
+pub fn create_memfd(
     mem_size: u64,
     hugetlb_size: Option<memfd::HugetlbSize>,
 ) -> Result<memfd::Memfd, MemoryError> {
@@ -346,6 +546,7 @@ mod tests {
 
     use std::collections::HashMap;
     use std::io::{Read, Seek};
+    use std::os::fd::AsFd;
 
     use vmm_sys_util::tempfile::TempFile;
 
@@ -567,7 +768,7 @@ mod tests {
         guest_memory.dump(&mut memory_file).unwrap();
 
         let restored_guest_memory = GuestMemoryMmap::from_regions(
-            snapshot_file(memory_file, memory_state.regions(), false).unwrap(),
+            file_private(memory_file, memory_state.regions(), false).unwrap(),
         )
         .unwrap();
 
@@ -629,7 +830,7 @@ mod tests {
 
         // We can restore from this because this is the first dirty dump.
         let restored_guest_memory = GuestMemoryMmap::from_regions(
-            snapshot_file(file, memory_state.regions(), false).unwrap(),
+            file_private(file, memory_state.regions(), false).unwrap(),
         )
         .unwrap();
 
@@ -726,4 +927,50 @@ mod tests {
         seals.insert(memfd::FileSeal::SealGrow);
         memfd.add_seals(&seals).unwrap_err();
     }
+
+    #[test]
+    fn test_bounce() {
+        let file_direct = TempFile::new().unwrap();
+        let file_bounced = TempFile::new().unwrap();
+        let file_persistent_bounced = TempFile::new().unwrap();
+
+        let mut data = (0..=255).collect::<Vec<_>>();
+
+        MaybeBounce::new(file_direct.as_file().as_fd(), false)
+            .write_all_volatile(&VolatileSlice::from(data.as_mut_slice()))
+            .unwrap();
+        MaybeBounce::new(file_bounced.as_file().as_fd(), true)
+            .write_all_volatile(&VolatileSlice::from(data.as_mut_slice()))
+            .unwrap();
+        MaybeBounce::<_, 7>::new_persistent(file_persistent_bounced.as_file().as_fd(), true)
+            .write_all_volatile(&VolatileSlice::from(data.as_mut_slice()))
+            .unwrap();
+
+        let mut data_direct = vec![0u8; 256];
+        let mut data_bounced = vec![0u8; 256];
+        let mut data_persistent_bounced = vec![0u8; 256];
+
+        file_direct.as_file().seek(SeekFrom::Start(0)).unwrap();
+        file_bounced.as_file().seek(SeekFrom::Start(0)).unwrap();
+        file_persistent_bounced
+            .as_file()
+            .seek(SeekFrom::Start(0))
+            .unwrap();
+
+        MaybeBounce::new(file_direct.as_file().as_fd(), false)
+            .read_exact_volatile(&mut VolatileSlice::from(data_direct.as_mut_slice()))
+            .unwrap();
+        MaybeBounce::new(file_bounced.as_file().as_fd(), true)
+            .read_exact_volatile(&mut VolatileSlice::from(data_bounced.as_mut_slice()))
+            .unwrap();
+        MaybeBounce::<_, 7>::new_persistent(file_persistent_bounced.as_file().as_fd(), true)
+            .read_exact_volatile(&mut VolatileSlice::from(
+                data_persistent_bounced.as_mut_slice(),
+            ))
+            .unwrap();
+
+        assert_eq!(data_direct, data_bounced);
+        assert_eq!(data_direct, data);
+        assert_eq!(data_persistent_bounced, data);
+    }
 }
diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs
index 642b2fd2352..9a25c0e4eb4 100644
--- a/src/vmm/src/vstate/vcpu.rs
+++ b/src/vmm/src/vstate/vcpu.rs
@@ -10,7 +10,7 @@ use std::cell::RefCell;
 use std::os::fd::AsRawFd;
 use std::sync::atomic::{Ordering, fence};
 use std::sync::mpsc::{Receiver, Sender, TryRecvError, channel};
-use std::sync::{Arc, Barrier};
+use std::sync::{Arc, Barrier, Condvar, Mutex};
 use std::{fmt, io, thread};
 
 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
@@ -31,11 +31,15 @@ use crate::logger::{IncMetric, METRICS};
 use crate::seccomp::{BpfProgram, BpfProgramRef};
 use crate::utils::signal::{Killable, register_signal_handler, sigrtmin};
 use crate::utils::sm::StateMachine;
-use crate::vstate::vm::Vm;
+use crate::vstate::vm::{UserfaultData, Vm};
 
 /// Signal number (SIGRTMIN) used to kick Vcpus.
 pub const VCPU_RTSIG_OFFSET: i32 = 0;
 
+// TODO: remove when KVM userfault support is merged upstream.
+/// VM exit due to a userfault.
+const KVM_MEMORY_EXIT_FLAG_USERFAULT: u64 = 1 << 4;
+
 /// Errors associated with the wrappers over KVM ioctls.
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
 pub enum VcpuError {
@@ -85,6 +89,8 @@ pub enum CopyKvmFdError {
     CreateVcpuError(#[from] kvm_ioctls::Error),
 }
 
+type UserfaultResolved = Arc<(Mutex<bool>, Condvar)>;
+
 // Stores the mmap region of `kvm_run` struct for the current Vcpu. This allows for the
 // signal handler to safely access the `kvm_run` even when Vcpu is dropped and vcpu fd
 // is closed.
@@ -109,6 +115,8 @@ pub struct Vcpu {
     response_receiver: Option<Receiver<VcpuResponse>>,
     /// The transmitting end of the responses channel owned by the vcpu side.
     response_sender: Sender<VcpuResponse>,
+    /// A condvar to notify the vCPU that a userfault has been resolved
+    userfault_resolved: Option<UserfaultResolved>,
 }
 
 impl Vcpu {
@@ -156,7 +164,14 @@ impl Vcpu {
     /// * `index` - Represents the 0-based CPU index between [0, max vcpus).
     /// * `vm` - The vm to which this vcpu will get attached.
     /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits.
-    pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result<Self, VcpuError> {
+    /// * `userfault_resolved` - An optional condvar that will get active when a userfault is
+    ///   resolved.
+    pub fn new(
+        index: u8,
+        vm: &Vm,
+        exit_evt: EventFd,
+        userfault_resolved: Option<UserfaultResolved>,
+    ) -> Result<Self, VcpuError> {
         let (event_sender, event_receiver) = channel();
         let (response_sender, response_receiver) = channel();
         let kvm_vcpu = KvmVcpu::new(index, vm).unwrap();
@@ -170,6 +185,7 @@ impl Vcpu {
             #[cfg(feature = "gdb")]
             gdb_event: None,
             kvm_vcpu,
+            userfault_resolved,
         })
     }
 
@@ -205,6 +221,7 @@ impl Vcpu {
     ) -> Result<VcpuHandle, StartThreadedError> {
         let event_sender = self.event_sender.take().expect("vCPU already started");
         let response_receiver = self.response_receiver.take().unwrap();
+        let userfault_resolved = self.userfault_resolved.clone();
         let vcpu_thread = thread::Builder::new()
             .name(format!("fc_vcpu {}", self.kvm_vcpu.index))
             .spawn(move || {
@@ -218,6 +235,7 @@ impl Vcpu {
         Ok(VcpuHandle::new(
             event_sender,
             response_receiver,
+            userfault_resolved,
             vcpu_thread,
         ))
     }
@@ -440,6 +458,34 @@ impl Vcpu {
         StateMachine::finish()
     }
 
+    fn handle_userfault(
+        &mut self,
+        userfaultfd_data: UserfaultData,
+    ) -> Result<VcpuEmulation, VcpuError> {
+        self.response_sender
+            .send(VcpuResponse::Userfault(userfaultfd_data))
+            .expect("Failed to send userfault data");
+        self.exit_evt.write(1).expect("Failed to write exit event");
+
+        let (lock, cvar) = self
+            .userfault_resolved
+            .as_deref()
+            .expect("Vcpu::handler_userfault called without userfault_resolved condvar");
+
+        let mut val = lock
+            .lock()
+            .expect("Failed to lock userfault resolved mutex");
+
+        while !*val {
+            val = cvar
+                .wait(val)
+                .expect("Failed to wait on userfault resolved condvar");
+        }
+        *val = false;
+
+        Ok(VcpuEmulation::Handled)
+    }
+
     /// Runs the vCPU in KVM context and handles the kvm exit reason.
     ///
     /// Returns error or enum specifying whether emulation was handled or interrupted.
@@ -456,6 +502,16 @@ impl Vcpu {
                 // Notify that this KVM_RUN was interrupted.
                 Ok(VcpuEmulation::Interrupted)
             }
+            Ok(VcpuExit::MemoryFault { flags, gpa, size }) => {
+                if flags & KVM_MEMORY_EXIT_FLAG_USERFAULT == 0 {
+                    Err(VcpuError::UnhandledKvmExit(format!(
+                        "flags {:x} gpa {:x} size {:x}",
+                        flags, gpa, size
+                    )))
+                } else {
+                    self.handle_userfault(UserfaultData { flags, gpa, size })
+                }
+            }
             #[cfg(feature = "gdb")]
             Ok(VcpuExit::Debug(_)) => {
                 if let Some(gdb_event) = &self.gdb_event {
@@ -606,6 +662,8 @@ pub enum VcpuResponse {
     SavedState(Box<VcpuState>),
     /// Vcpu is in the state where CPU config is dumped.
     DumpedCpuConfig(Box<CpuConfiguration>),
+    /// Vcpu exited due to a userfault
+    Userfault(UserfaultData),
 }
 
 impl fmt::Debug for VcpuResponse {
@@ -619,6 +677,9 @@ impl fmt::Debug for VcpuResponse {
             Error(err) => write!(f, "VcpuResponse::Error({:?})", err),
             NotAllowed(reason) => write!(f, "VcpuResponse::NotAllowed({})", reason),
             DumpedCpuConfig(_) => write!(f, "VcpuResponse::DumpedCpuConfig"),
+            Userfault(userfault_data) => {
+                write!(f, "VcpuResponse::Userfault({:?})", userfault_data)
+            }
         }
     }
 }
@@ -628,6 +689,7 @@ impl fmt::Debug for VcpuResponse {
 pub struct VcpuHandle {
     event_sender: Sender<VcpuEvent>,
     response_receiver: Receiver<VcpuResponse>,
+    userfault_resolved: Option<UserfaultResolved>,
     // Rust JoinHandles have to be wrapped in Option if you ever plan on 'join()'ing them.
     // We want to be able to join these threads in tests.
     vcpu_thread: Option<thread::JoinHandle<()>>,
@@ -644,15 +706,19 @@ impl VcpuHandle {
     /// # Arguments
     /// + `event_sender`: [`Sender`] to communicate [`VcpuEvent`] to control the vcpu.
     /// + `response_received`: [`Received`] from which the vcpu's responses can be read.
+    /// + `userfault_resolved`: An optional condvar to notify the vcpu that a userfault has been
+    ///   resolved.
     /// + `vcpu_thread`: A [`JoinHandle`] for the vcpu thread.
     pub fn new(
         event_sender: Sender<VcpuEvent>,
         response_receiver: Receiver<VcpuResponse>,
+        userfault_resolved: Option<UserfaultResolved>,
         vcpu_thread: thread::JoinHandle<()>,
     ) -> Self {
         Self {
             event_sender,
             response_receiver,
+            userfault_resolved,
             vcpu_thread: Some(vcpu_thread),
         }
     }
@@ -675,6 +741,20 @@ impl VcpuHandle {
         Ok(())
     }
 
+    /// Sends "userfault resolved" event to vCPU.
+    pub fn send_userfault_resolved(&self) {
+        let (lock, cvar) = self.userfault_resolved.as_deref().expect(
+            "VcpuHandle::send_userfault_resolved called without userfault_resolved condvar",
+        );
+
+        let mut val = lock
+            .lock()
+            .expect("Failed to lock userfault resolved mutex");
+
+        *val = true;
+        cvar.notify_one();
+    }
+
     /// Returns a reference to the [`Received`] from which the vcpu's responses can be read.
     pub fn response_receiver(&self) -> &Receiver<VcpuResponse> {
         &self.response_receiver
@@ -704,7 +784,6 @@ pub enum VcpuEmulation {
     Interrupted,
     /// Stopped.
     Stopped,
-    /// Pause request
     #[cfg(feature = "gdb")]
     Paused,
 }
@@ -863,6 +942,7 @@ pub(crate) mod tests {
             match self {
                 Paused | Resumed | Exited(_) => (),
                 Error(_) | NotAllowed(_) | SavedState(_) | DumpedCpuConfig(_) => (),
+                Userfault(_) => (),
             };
             match (self, other) {
                 (Paused, Paused) | (Resumed, Resumed) => true,
@@ -883,7 +963,7 @@ pub(crate) mod tests {
     pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu) {
         let (kvm, mut vm) = setup_vm_with_memory(mem_size);
 
-        let (mut vcpus, _) = vm.create_vcpus(1).unwrap();
+        let (mut vcpus, _) = vm.create_vcpus(1, false).unwrap();
         let mut vcpu = vcpus.remove(0);
 
         #[cfg(target_arch = "aarch64")]
diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs
index deef6710b90..c8691a98317 100644
--- a/src/vmm/src/vstate/vm.rs
+++ b/src/vmm/src/vstate/vm.rs
@@ -6,27 +6,30 @@
 // found in the THIRD-PARTY file.
 
 use std::collections::HashMap;
-use std::fs::OpenOptions;
+use std::fs::{File, OpenOptions};
 use std::io::Write;
+use std::os::fd::{AsFd, AsRawFd, FromRawFd};
 use std::path::Path;
 use std::sync::atomic::{AtomicBool, Ordering};
-use std::sync::{Arc, Mutex, MutexGuard};
+use std::sync::{Arc, Condvar, Mutex, MutexGuard};
 
 #[cfg(target_arch = "x86_64")]
 use kvm_bindings::KVM_IRQCHIP_IOAPIC;
 use kvm_bindings::{
-    KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MSI_VALID_DEVID,
-    KvmIrqRouting, kvm_irq_routing_entry, kvm_userspace_memory_region,
+    KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES,
+    KVM_MSI_VALID_DEVID, KVMIO, KvmIrqRouting, kvm_create_guest_memfd, kvm_irq_routing_entry,
+    kvm_userspace_memory_region,
 };
-use kvm_ioctls::VmFd;
+use kvm_ioctls::{Cap, VmFd};
 use log::{debug, error};
 use pci::DeviceRelocation;
 use serde::{Deserialize, Serialize};
 use vm_device::interrupt::{
     InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig,
 };
-use vmm_sys_util::errno;
 use vmm_sys_util::eventfd::EventFd;
+use vmm_sys_util::ioctl::ioctl_with_ref;
+use vmm_sys_util::{errno, ioctl_iow_nr};
 
 pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState};
 use crate::arch::{GSI_MSI_END, host_page_size};
@@ -36,12 +39,27 @@ use crate::snapshot::Persist;
 use crate::utils::u64_to_usize;
 use crate::vmm_config::snapshot::SnapshotType;
 use crate::vstate::memory::{
-    Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap,
+    Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion,
+    GuestRegionMmap, MaybeBounce,
 };
 use crate::vstate::resources::ResourceAllocator;
 use crate::vstate::vcpu::VcpuError;
 use crate::{DirtyBitmap, Vcpu, mem_size_mib};
 
+pub(crate) const GUEST_MEMFD_FLAG_MMAP: u64 = 1;
+pub(crate) const GUEST_MEMFD_FLAG_NO_DIRECT_MAP: u64 = 2;
+
+/// KVM userfault information
+#[derive(Copy, Clone, Default, Eq, PartialEq, Debug)]
+pub struct UserfaultData {
+    /// Flags
+    pub flags: u64,
+    /// Guest physical address
+    pub gpa: u64,
+    /// Size
+    pub size: u64,
+}
+
 #[derive(Debug, thiserror::Error, displaydoc::Display)]
 /// Errors related with Firecracker interrupts
 pub enum InterruptError {
@@ -249,6 +267,7 @@ pub struct VmCommon {
     pub resource_allocator: Mutex<ResourceAllocator>,
     /// MMIO bus
     pub mmio_bus: Arc<vm_device::Bus>,
+    secret_free: bool,
 }
 
 /// Errors associated with the wrappers over KVM ioctls.
@@ -275,13 +294,42 @@ pub enum VmError {
     /// Error calling mincore: {0}
     Mincore(vmm_sys_util::errno::Error),
     /// ResourceAllocator error: {0}
-    ResourceAllocator(#[from] vm_allocator::Error)
+    ResourceAllocator(#[from] vm_allocator::Error),
+    /// Failure to create guest_memfd: {0}
+    GuestMemfd(kvm_ioctls::Error),
+    /// guest_memfd is not supported on this host kernel.
+    GuestMemfdNotSupported,
+}
+
+// Upstream `kvm_userspace_memory_region2` definition does not include `userfault_bitmap` field yet.
+// TODO: revert to `kvm_userspace_memory_region2` from kvm-bindings
+#[allow(non_camel_case_types)]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, PartialEq)]
+struct kvm_userspace_memory_region2 {
+    slot: u32,
+    flags: u32,
+    guest_phys_addr: u64,
+    memory_size: u64,
+    userspace_addr: u64,
+    guest_memfd_offset: u64,
+    guest_memfd: u32,
+    pad1: u32,
+    userfault_bitmap: u64,
+    pad2: [u64; 13],
 }
 
 /// Contains Vm functions that are usable across CPU architectures
 impl Vm {
     /// Create a KVM VM
-    pub fn create_common(kvm: &crate::vstate::kvm::Kvm) -> Result<VmCommon, VmError> {
+    pub fn create_common(
+        kvm: &crate::vstate::kvm::Kvm,
+        secret_free: bool,
+    ) -> Result<VmCommon, VmError> {
+        if secret_free && !kvm.fd.check_extension(Cap::GuestMemfd) {
+            return Err(VmError::GuestMemfdNotSupported);
+        }
+
         // It is known that KVM_CREATE_VM occasionally fails with EINTR on heavily loaded machines
         // with many VMs.
         //
@@ -305,7 +353,9 @@ impl Vm {
         const MAX_ATTEMPTS: u32 = 5;
         let mut attempt = 1;
         let fd = loop {
-            match kvm.fd.create_vm() {
+            let create_result = kvm.fd.create_vm();
+
+            match create_result {
                 Ok(fd) => break fd,
                 Err(e) if e.errno() == libc::EINTR && attempt < MAX_ATTEMPTS => {
                     info!("Attempt #{attempt} of KVM_CREATE_VM returned EINTR");
@@ -325,13 +375,18 @@ impl Vm {
             interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)),
             resource_allocator: Mutex::new(ResourceAllocator::new()),
             mmio_bus: Arc::new(vm_device::Bus::new()),
+            secret_free,
         })
     }
 
     /// Creates the specified number of [`Vcpu`]s.
     ///
     /// The returned [`EventFd`] is written to whenever any of the vcpus exit.
-    pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec<Vcpu>, EventFd), VmError> {
+    pub fn create_vcpus(
+        &mut self,
+        vcpu_count: u8,
+        secret_free: bool,
+    ) -> Result<(Vec<Vcpu>, EventFd), VmError> {
         self.arch_pre_create_vcpus(vcpu_count)?;
 
         let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?;
@@ -339,7 +394,14 @@ impl Vm {
         let mut vcpus = Vec::with_capacity(vcpu_count as usize);
         for cpu_idx in 0..vcpu_count {
             let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?;
-            let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?;
+            let userfault_resolved = if secret_free {
+                Some(Arc::new((Mutex::new(false), Condvar::new())))
+            } else {
+                None
+            };
+
+            let vcpu = Vcpu::new(cpu_idx, self, exit_evt, userfault_resolved)
+                .map_err(VmError::CreateVcpu)?;
             vcpus.push(vcpu);
         }
 
@@ -348,20 +410,87 @@ impl Vm {
         Ok((vcpus, exit_evt))
     }
 
+    /// Create a guest_memfd of the specified size
+    pub fn create_guest_memfd(&self, size: usize, flags: u64) -> Result<File, VmError> {
+        assert_eq!(
+            size & (host_page_size() - 1),
+            0,
+            "guest_memfd size must be page aligned"
+        );
+
+        let kvm_gmem = kvm_create_guest_memfd {
+            size: size as u64,
+            flags,
+            ..Default::default()
+        };
+
+        self.fd()
+            .create_guest_memfd(kvm_gmem)
+            .map_err(VmError::GuestMemfd)
+            // SAFETY: We know rawfd is a valid fd because create_guest_memfd didn't return an
+            // error.
+            .map(|rawfd| unsafe { File::from_raw_fd(rawfd) })
+    }
+
     /// Register a list of new memory regions to this [`Vm`].
     pub fn register_memory_regions(
         &mut self,
         regions: Vec<GuestRegionMmap>,
+        mut userfault_bitmap: Option<&mut [u8]>,
     ) -> Result<(), VmError> {
         for region in regions {
-            self.register_memory_region(region)?
+            let bitmap_slice = if let Some(remaining) = userfault_bitmap {
+                let region_len = u64_to_usize(region.len());
+                // Firecracker does not allow sub-MB granularity when allocating guest memory
+                assert_eq!(region_len % (host_page_size() * u8::BITS as usize), 0);
+                let bitmap_len = region_len / host_page_size() / (u8::BITS as usize);
+                let (head, tail) = remaining.split_at_mut(bitmap_len);
+                userfault_bitmap = Some(tail);
+                Some(head)
+            } else {
+                None
+            };
+            self.register_memory_region(region, bitmap_slice)?
         }
-
         Ok(())
     }
 
+    // TODO: remove when userfault support is merged upstream
+    fn set_user_memory_region2(
+        &self,
+        user_memory_region2: kvm_userspace_memory_region2,
+    ) -> Result<(), VmError> {
+        ioctl_iow_nr!(
+            KVM_SET_USER_MEMORY_REGION2,
+            KVMIO,
+            0x49,
+            kvm_userspace_memory_region2
+        );
+
+        #[allow(clippy::undocumented_unsafe_blocks)]
+        let ret = unsafe {
+            ioctl_with_ref(
+                self.fd(),
+                KVM_SET_USER_MEMORY_REGION2(),
+                &user_memory_region2,
+            )
+        };
+        if ret == 0 {
+            Ok(())
+        } else {
+            Err(VmError::SetUserMemoryRegion(kvm_ioctls::Error::last()))
+        }
+    }
+
     /// Register a new memory region to this [`Vm`].
-    pub fn register_memory_region(&mut self, region: GuestRegionMmap) -> Result<(), VmError> {
+    pub fn register_memory_region(
+        &mut self,
+        region: GuestRegionMmap,
+        userfault_bitmap: Option<&mut [u8]>,
+    ) -> Result<(), VmError> {
+        // TODO: take it from kvm-bindings when merged upstream
+        const KVM_MEM_USERFAULT: u32 = 1 << 3;
+
         let next_slot = self
             .guest_memory()
             .num_regions()
@@ -371,27 +500,69 @@ impl Vm {
             return Err(VmError::NotEnoughMemorySlots(self.common.max_memslots));
         }
 
-        let flags = if region.bitmap().is_some() {
-            KVM_MEM_LOG_DIRTY_PAGES
+        let mut flags = 0;
+        if region.bitmap().is_some() {
+            flags |= KVM_MEM_LOG_DIRTY_PAGES;
+        }
+
+        #[allow(clippy::cast_sign_loss)]
+        let (guest_memfd, guest_memfd_offset) = if self.secret_free() {
+            flags |= KVM_MEM_GUEST_MEMFD;
+
+            let fo = region
+                .file_offset()
+                .expect("secret hidden VMs must mmap guest_memfd for memslots");
+
+            (fo.file().as_raw_fd() as u32, fo.start())
         } else {
-            0
+            (0, 0)
         };
 
-        let memory_region = kvm_userspace_memory_region {
+        let userfault_bitmap = match userfault_bitmap {
+            Some(addr) => {
+                flags |= KVM_MEM_USERFAULT;
+                addr.as_ptr() as u64
+            }
+            None => 0,
+        };
+
+        let memory_region = kvm_userspace_memory_region2 {
             slot: next_slot,
             guest_phys_addr: region.start_addr().raw_value(),
             memory_size: region.len(),
             userspace_addr: region.as_ptr() as u64,
             flags,
+            guest_memfd,
+            guest_memfd_offset,
+            userfault_bitmap,
+            ..Default::default()
         };
 
         let new_guest_memory = self.common.guest_memory.insert_region(Arc::new(region))?;
 
-        // SAFETY: Safe because the fd is a valid KVM file descriptor.
-        unsafe {
-            self.fd()
-                .set_user_memory_region(memory_region)
-                .map_err(VmError::SetUserMemoryRegion)?;
+        if self.fd().check_extension(Cap::UserMemory2) {
+            self.set_user_memory_region2(memory_region)?;
+        } else {
+            // Something is seriously wrong if we manage to set these fields on a host that doesn't
+            // even allow creation of guest_memfds!
+            assert_eq!(memory_region.guest_memfd, 0);
+            assert_eq!(memory_region.guest_memfd_offset, 0);
+            assert_eq!(memory_region.userfault_bitmap, 0);
+            assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
+            assert_eq!(memory_region.flags & KVM_MEM_USERFAULT, 0);
+
+            // SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
+            unsafe {
+                self.fd()
+                    .set_user_memory_region(kvm_userspace_memory_region {
+                        slot: memory_region.slot,
+                        flags: memory_region.flags,
+                        guest_phys_addr: memory_region.guest_phys_addr,
+                        memory_size: memory_region.memory_size,
+                        userspace_addr: memory_region.userspace_addr,
+                    })
+                    .map_err(VmError::SetUserMemoryRegion)?;
+            }
         }
 
         self.common.guest_memory = new_guest_memory;
@@ -399,6 +570,11 @@ impl Vm {
         Ok(())
     }
 
+    /// Whether this VM is secret free
+    pub fn secret_free(&self) -> bool {
+        self.common.secret_free
+    }
+
     /// Gets a reference to the kvm file descriptor owned by this VM.
     pub fn fd(&self) -> &VmFd {
         &self.common.fd
@@ -501,7 +677,11 @@ impl Vm {
                 self.guest_memory().dump_dirty(&mut file, &dirty_bitmap)?;
             }
             SnapshotType::Full => {
-                self.guest_memory().dump(&mut file)?;
+                self.guest_memory()
+                    .dump(&mut MaybeBounce::<_, 4096>::new_persistent(
+                        file.as_fd(),
+                        self.secret_free(),
+                    ))?;
                 self.reset_dirty_bitmap();
                 self.guest_memory().reset_dirty();
             }
@@ -693,7 +873,7 @@ pub(crate) mod tests {
     // Auxiliary function being used throughout the tests.
     pub(crate) fn setup_vm() -> (Kvm, Vm) {
         let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
-        let vm = Vm::new(&kvm).expect("Cannot create new vm");
+        let vm = Vm::new(&kvm, false).expect("Cannot create new vm");
         (kvm, vm)
     }
 
@@ -701,7 +881,7 @@ pub(crate) mod tests {
     pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) {
         let (kvm, mut vm) = setup_vm();
         let gm = single_region_mem_raw(mem_size);
-        vm.register_memory_regions(gm).unwrap();
+        vm.register_memory_regions(gm, None).unwrap();
         (kvm, vm)
     }
 
@@ -709,7 +889,19 @@ pub(crate) mod tests {
     fn test_new() {
         // Testing with a valid /dev/kvm descriptor.
         let kvm = Kvm::new(vec![]).expect("Cannot create Kvm");
-        Vm::new(&kvm).unwrap();
+        Vm::new(&kvm, false).unwrap();
+    }
+
+    #[test]
+    fn test_new_secret_free() {
+        let kvm = Kvm::new(vec![]).unwrap();
+
+        if !kvm.fd.check_extension(Cap::GuestMemfd) {
+            return;
+        }
+
+        Vm::new(&kvm, true)
+            .expect("should be able to create secret free VMs if guest_memfd is supported");
     }
 
     #[test]
@@ -719,14 +911,14 @@ pub(crate) mod tests {
         // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
         // will result in error.
         let gm = single_region_mem_raw(0x10);
-        let res = vm.register_memory_regions(gm);
+        let res = vm.register_memory_regions(gm, None);
         assert_eq!(
             res.unwrap_err().to_string(),
             "Cannot set the memory regions: Invalid argument (os error 22)"
         );
 
         let gm = single_region_mem_raw(0x1000);
-        let res = vm.register_memory_regions(gm);
+        let res = vm.register_memory_regions(gm, None);
         res.unwrap();
     }
 
@@ -761,7 +953,7 @@ pub(crate) mod tests {
 
             let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap();
 
-            let res = vm.register_memory_region(region);
+            let res = vm.register_memory_region(region, None);
 
             if max_nr_regions <= i {
                 assert!(
@@ -787,7 +979,7 @@ pub(crate) mod tests {
         let vcpu_count = 2;
         let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128));
 
-        let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap();
+        let (vcpu_vec, _) = vm.create_vcpus(vcpu_count, false).unwrap();
 
         assert_eq!(vcpu_vec.len(), vcpu_count as usize);
     }
diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs
index 4abbedc4530..92db7677cfc 100644
--- a/src/vmm/tests/integration_tests.rs
+++ b/src/vmm/tests/integration_tests.rs
@@ -36,11 +36,9 @@ use vmm_sys_util::tempfile::TempFile;
 
 #[allow(unused_mut, unused_variables)]
 fn check_booted_microvm(vmm: Arc<Mutex<Vmm>>, mut evmgr: EventManager) {
+    // TODO: fix this behaviour on x86_64.
     // On x86_64, the vmm should exit once its workload completes and signals the exit event.
     // On aarch64, the test kernel doesn't exit, so the vmm is force-stopped.
-    #[cfg(target_arch = "x86_64")]
-    evmgr.run_with_timeout(500).unwrap();
-    #[cfg(target_arch = "aarch64")]
     vmm.lock().unwrap().stop(FcExitCode::Ok);
 
     assert_eq!(
@@ -81,12 +79,10 @@ fn check_build_microvm(vmm: Arc<Mutex<Vmm>>, mut evmgr: EventManager) {
     assert_eq!(vmm.lock().unwrap().instance_info().state, VmState::Paused);
 
     // The microVM should be able to resume and exit successfully.
+    // TODO: fix this behaviour on x86_64.
     // On x86_64, the vmm should exit once its workload completes and signals the exit event.
     // On aarch64, the test kernel doesn't exit, so the vmm is force-stopped.
     vmm.lock().unwrap().resume_vm().unwrap();
-    #[cfg(target_arch = "x86_64")]
-    evmgr.run_with_timeout(500).unwrap();
-    #[cfg(target_arch = "aarch64")]
     vmm.lock().unwrap().stop(FcExitCode::Ok);
     assert_eq!(
         vmm.lock().unwrap().shutdown_exit_code(),
diff --git a/tests/README.md b/tests/README.md
index e8ad62d0792..803b4e8ec62 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -340,6 +340,8 @@ which tests are run in which context:
   in separate pipelines according to various cron schedules.
 - Tests marked as `no_block_pr` are run in the "optional" PR CI pipeline. This
   pipeline is not required to pass for merging a PR.
+- Tests marked as `secret_hiding` are secret hiding specifc tests. They don't
+  run by default.
 
 All tests without markers are run for every pull request, and are required to
 pass for the PR to be merged.
diff --git a/tests/conftest.py b/tests/conftest.py
index 7a6423e9d6f..50e0c241f19 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -139,7 +139,7 @@ def pytest_runtest_logreport(report):
             "test": report.nodeid,
             "instance": global_props.instance,
             "cpu_model": global_props.cpu_model,
-            "host_kernel": "linux-" + global_props.host_linux_version,
+            "host_kernel": "linux-" + global_props.host_linux_version_metrics,
             "phase": report.when,
         },
         # per test
@@ -147,7 +147,7 @@ def pytest_runtest_logreport(report):
             "test": report.nodeid,
             "instance": global_props.instance,
             "cpu_model": global_props.cpu_model,
-            "host_kernel": "linux-" + global_props.host_linux_version,
+            "host_kernel": "linux-" + global_props.host_linux_version_metrics,
         },
         # per coarse-grained test name, dropping parameters and other dimensions to reduce metric count for dashboard
         # Note: noideid is formatted as below
@@ -159,7 +159,7 @@ def pytest_runtest_logreport(report):
         # per phase
         {"phase": report.when},
         # per host kernel
-        {"host_kernel": "linux-" + global_props.host_linux_version},
+        {"host_kernel": "linux-" + global_props.host_linux_version_metrics},
         # per CPU
         {"cpu_model": global_props.cpu_model},
         # and global
@@ -442,6 +442,20 @@ def snapshot_type(request):
     return request.param
 
 
+secret_free_test_cases = [False]
+if (
+    global_props.host_linux_version_metrics == "next"
+    and global_props.instance != "m6g.metal"
+):
+    secret_free_test_cases.append(True)
+
+
+@pytest.fixture(params=secret_free_test_cases)
+def secret_free(request):
+    """Supported secret hiding configuration, based on hardware"""
+    return request.param
+
+
 @pytest.fixture
 def results_dir(request, pytestconfig):
     """
@@ -620,6 +634,7 @@ def uvm_restored(
     uvm = uvm_booted(
         microvm_factory, guest_kernel, rootfs, cpu_template, pci_enabled, **kwargs
     )
+    uvm.memory_monitor = None
     snapshot = uvm.snapshot_full()
     uvm.kill()
     uvm2 = microvm_factory.build_from_snapshot(snapshot)
diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py
index 03f90905843..16fed6e2b39 100644
--- a/tests/framework/microvm.py
+++ b/tests/framework/microvm.py
@@ -270,6 +270,7 @@ def __init__(
         self.disks_vhost_user = {}
         self.vcpus_count = None
         self.mem_size_bytes = None
+        self.secret_free = False
         self.cpu_template_name = "None"
         # The given custom CPU template will be set in basic_config() but could
         # be overwritten via set_cpu_template().
@@ -494,12 +495,13 @@ def dimensions(self):
         return {
             "instance": global_props.instance,
             "cpu_model": global_props.cpu_model,
-            "host_kernel": f"linux-{global_props.host_linux_version}",
+            "host_kernel": f"linux-{global_props.host_linux_version_metrics}",
             "guest_kernel": self.kernel_file.stem[2:],
             "rootfs": self.rootfs_file.name,
             "vcpus": str(self.vcpus_count),
             "guest_memory": f"{self.mem_size_bytes / (1024 * 1024)}MB",
             "pci": f"{self.pci_enabled}",
+            "secret_free": str(self.secret_free or False),
         }
 
     @property
@@ -795,6 +797,7 @@ def basic_config(
         rootfs_io_engine=None,
         cpu_template: Optional[str] = None,
         enable_entropy_device=False,
+        secret_free=None,
     ):
         """Shortcut for quickly configuring a microVM.
 
@@ -812,15 +815,23 @@ def basic_config(
         which differs from Firecracker's default only in the enabling of the serial console.
         Reference: file:../../src/vmm/src/vmm_config/boot_source.rs::DEFAULT_KERNEL_CMDLINE
         """
+        # Have to do it this way as otherwise A/B-tests fail if the 'A' revision
+        # of Firecracker doesn't know about the secret_free parameter.
+        kwargs = {}
+        if secret_free:
+            kwargs["secret_free"] = True
+
         self.api.machine_config.put(
             vcpu_count=vcpu_count,
             smt=smt,
             mem_size_mib=mem_size_mib,
             track_dirty_pages=track_dirty_pages,
             huge_pages=huge_pages,
+            **kwargs,
         )
         self.vcpus_count = vcpu_count
         self.mem_size_bytes = mem_size_mib * 2**20
+        self.secret_free = secret_free or False
 
         if self.custom_cpu_template is not None:
             self.set_cpu_template(self.custom_cpu_template)
diff --git a/tests/framework/properties.py b/tests/framework/properties.py
index 0c430cfd41d..464e6cabad2 100644
--- a/tests/framework/properties.py
+++ b/tests/framework/properties.py
@@ -104,6 +104,13 @@ def host_linux_version_tpl(self):
         """Host Linux version major.minor, as a tuple for easy comparison"""
         return tuple(int(x) for x in self.host_linux_version.split("."))
 
+    @property
+    def host_linux_version_metrics(self):
+        """Host Linux version to be reported in metrics"""
+        return (
+            "next" if self.host_linux_version_tpl > (6, 12) else self.host_linux_version
+        )
+
     @property
     def is_ec2(self):
         """Are we running on an EC2 instance?"""
diff --git a/tests/framework/vm_config.json b/tests/framework/vm_config.json
index 6948002e245..188734ab0d6 100644
--- a/tests/framework/vm_config.json
+++ b/tests/framework/vm_config.json
@@ -20,6 +20,7 @@
   "machine-config": {
     "vcpu_count": 2,
     "mem_size_mib": 1024,
+    "secret_free": false,
     "smt": false,
     "track_dirty_pages": false,
     "huge_pages": "None"
diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py
index e2a1862c21f..aa04b2b5b65 100644
--- a/tests/host_tools/fcmetrics.py
+++ b/tests/host_tools/fcmetrics.py
@@ -513,7 +513,7 @@ def __init__(self, vm, timer=60):
         self.metrics_logger.set_dimensions(
             {
                 "instance": global_props.instance,
-                "host_kernel": "linux-" + global_props.host_linux_version,
+                "host_kernel": "linux-" + global_props.host_linux_version_metrics,
                 "guest_kernel": vm.kernel_file.stem[2:],
             }
         )
diff --git a/tests/host_tools/memory.py b/tests/host_tools/memory.py
index 134147724cd..1bc4cd26bf3 100644
--- a/tests/host_tools/memory.py
+++ b/tests/host_tools/memory.py
@@ -170,7 +170,5 @@ def __enter__(self):
     def __exit__(self, _type, _value, _traceback):
         """Exit context"""
 
-        if self.is_alive():
-            self.signal_stop()
-            self.join(timeout=1)
+        self.stop()
         self.check_samples()
diff --git a/tests/integration_tests/build/test_hiding_kernel.py b/tests/integration_tests/build/test_hiding_kernel.py
new file mode 100644
index 00000000000..1d76b31260f
--- /dev/null
+++ b/tests/integration_tests/build/test_hiding_kernel.py
@@ -0,0 +1,30 @@
+# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""A test which checks that the secret hiding enable kernel builds successfully."""
+
+import pytest
+
+from framework import utils
+
+
+@pytest.mark.timeout(600)
+@pytest.mark.secret_hiding
+def test_build_hiding_kernel():
+    """
+    In the test we will run our kernel build script to check it succeeds and builds the hidden kernel
+    """
+
+    # We have some extra deps for building the kernel that are not in the dev container
+    utils.check_output("apt update")
+    utils.check_output(
+        "apt install -y build-essential libncurses-dev bison flex libssl-dev libelf-dev bc dwarves libncurses5-dev kmod fakeroot"
+    )
+
+    # We have to configure git otherwise patch application fails
+    # the git log still credits the original author
+    utils.check_output('git config --global user.name "Firecracker CI"')
+    utils.check_output('git config --global user.email "ci@email.com"')
+
+    utils.check_output(
+        "cd ../resources/hiding_ci; ./build_and_install_kernel.sh --no-install --tidy"
+    )
diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py
index 32527e5c905..fd1cb0ef504 100644
--- a/tests/integration_tests/functional/test_api.py
+++ b/tests/integration_tests/functional/test_api.py
@@ -374,9 +374,7 @@ def test_api_machine_config(uvm_plain):
     bad_size = (1 << 64) - 1
     test_microvm.api.machine_config.patch(mem_size_mib=bad_size)
 
-    fail_msg = re.escape(
-        "Invalid Memory Configuration: Cannot create mmap region: Out of memory (os error 12)"
-    )
+    fail_msg = re.escape("Out of memory (os error 12)")
     with pytest.raises(RuntimeError, match=fail_msg):
         test_microvm.start()
 
@@ -749,6 +747,7 @@ def test_drive_patch(uvm_plain, io_engine):
 @pytest.mark.skipif(
     platform.machine() != "x86_64", reason="not yet implemented on aarch64"
 )
+@pytest.mark.skip(reason="TODO: fix graceful shutdown on x86_64")
 def test_send_ctrl_alt_del(uvm_plain_any):
     """
     Test shutting down the microVM gracefully on x86, by sending CTRL+ALT+DEL.
@@ -1056,6 +1055,7 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano):
     setup_cfg["machine-config"] = {
         "vcpu_count": 2,
         "mem_size_mib": 256,
+        "secret_free": False,
         "smt": True,
         "track_dirty_pages": False,
         "huge_pages": "None",
@@ -1172,6 +1172,7 @@ def test_get_full_config(uvm_plain):
     expected_cfg["machine-config"] = {
         "vcpu_count": 2,
         "mem_size_mib": 256,
+        "secret_free": False,
         "smt": False,
         "track_dirty_pages": False,
         "huge_pages": "None",
diff --git a/tests/integration_tests/functional/test_cmd_line_start.py b/tests/integration_tests/functional/test_cmd_line_start.py
index 3d45fa9d694..0fdcb1ebe1d 100644
--- a/tests/integration_tests/functional/test_cmd_line_start.py
+++ b/tests/integration_tests/functional/test_cmd_line_start.py
@@ -156,6 +156,7 @@ def test_config_start_no_api(uvm_plain, vm_config_file):
 
 
 @pytest.mark.parametrize("vm_config_file", ["framework/vm_config_network.json"])
+@pytest.mark.skip(reason="TODO: fix graceful shutdown on x86_64")
 def test_config_start_no_api_exit(uvm_plain, vm_config_file):
     """
     Test microvm exit when API server is disabled.
diff --git a/tests/integration_tests/functional/test_cpu_all.py b/tests/integration_tests/functional/test_cpu_all.py
index 6b934ffa394..e646c5fa0f6 100644
--- a/tests/integration_tests/functional/test_cpu_all.py
+++ b/tests/integration_tests/functional/test_cpu_all.py
@@ -18,6 +18,7 @@
 @pytest.mark.parametrize("vcpu_count", [MAX_VCPUS])
 def test_all_vcpus_online(uvm_any):
     """Check all vCPUs are online inside guest"""
+    uvm_any.memory_monitor = None
     assert (
         uvm_any.ssh.check_output("cat /sys/devices/system/cpu/online").stdout.strip()
         == f"0-{uvm_any.vcpus_count - 1}"
@@ -37,6 +38,7 @@ def test_all_vcpus_have_same_features(uvm_any):
     only test the equivalence of all CPUs in the same guest.
     """
     # Get a feature set for each CPU and deduplicate them.
+    uvm_any.memory_monitor = None
     unique_feature_lists = uvm_any.ssh.check_output(
         'grep -E "^(flags|Features)" /proc/cpuinfo | uniq'
     ).stdout.splitlines()
diff --git a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py
index 012e1c7d3e7..090ba8e2c5f 100644
--- a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py
+++ b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py
@@ -15,6 +15,8 @@
 
 import os
 
+import pytest
+
 from framework import utils
 from framework.properties import global_props
 from framework.utils_cpuid import CPU_FEATURES_CMD, CpuModel
@@ -152,6 +154,10 @@
 }
 
 
+@pytest.mark.skipif(
+    global_props.host_linux_version_tpl > (6, 1),
+    reason="We don't currently track features for host kernels above 6.1.",
+)
 def test_host_vs_guest_cpu_features(uvm_plain_any):
     """Check CPU features host vs guest"""
 
diff --git a/tests/integration_tests/functional/test_secret_freedom.py b/tests/integration_tests/functional/test_secret_freedom.py
new file mode 100644
index 00000000000..fa83b2da0ab
--- /dev/null
+++ b/tests/integration_tests/functional/test_secret_freedom.py
@@ -0,0 +1,68 @@
+# Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Test secret-freedom related functionality."""
+
+import pytest
+
+from framework import defs
+from framework.microvm import Serial
+from framework.properties import global_props
+from integration_tests.performance.test_initrd import INITRD_FILESYSTEM
+
+pytestmark = [
+    pytest.mark.skipif(
+        global_props.host_linux_version_metrics != "next",
+        reason="Secret Freedom is only supported on the in-dev upstream kernels for now",
+    ),
+    pytest.mark.skipif(
+        global_props.instance == "m6g.metal",
+        reason="Secret Freedom currently only works on ARM hardware conforming to at least ARMv8.4 as absense of ARM64_HAS_STAGE2_FWB causes kernel panics because of dcache flushing during stage2 page table entry installation",
+    ),
+]
+
+
+def test_secret_free_boot(microvm_factory, guest_kernel, rootfs):
+    """Tests that a VM can boot, e.g. some basic I/O works through userspace bounce buffers"""
+    vm = microvm_factory.build(guest_kernel, rootfs)
+    vm.spawn()
+    vm.memory_monitor = None
+    vm.basic_config(secret_free=True)
+    vm.add_net_iface()
+    vm.start()
+
+
+def test_secret_free_initrd(microvm_factory, guest_kernel):
+    """
+    Test that we can boot a secret hidden initrd (e.g. a VM with no I/O devices)
+    """
+    fs = defs.ARTIFACT_DIR / "initramfs.cpio"
+    uvm = microvm_factory.build(guest_kernel)
+    uvm.initrd_file = fs
+    uvm.help.enable_console()
+    uvm.spawn(serial_out_path=None)
+    uvm.memory_monitor = None
+
+    uvm.basic_config(
+        add_root_device=False,
+        vcpu_count=1,
+        use_initrd=True,
+        secret_free=True,
+    )
+
+    uvm.start()
+    serial = Serial(uvm)
+    serial.open()
+    serial.rx(token="# ")
+    serial.tx("mount |grep rootfs")
+    serial.rx(token=f"rootfs on / type {INITRD_FILESYSTEM}")
+
+
+def test_secret_free_snapshot_creation(microvm_factory, guest_kernel, rootfs):
+    """Test that snapshot creation works for secret hidden VMs"""
+    vm = microvm_factory.build(guest_kernel, rootfs)
+    vm.spawn()
+    vm.memory_monitor = None
+    vm.basic_config(secret_free=True)
+    vm.add_net_iface()
+    vm.start()
+    vm.snapshot_full()
diff --git a/tests/integration_tests/functional/test_shut_down.py b/tests/integration_tests/functional/test_shut_down.py
index 4b21aa3d2d5..a9c6fb12bbd 100644
--- a/tests/integration_tests/functional/test_shut_down.py
+++ b/tests/integration_tests/functional/test_shut_down.py
@@ -4,11 +4,18 @@
 
 import platform
 
+import pytest
 from packaging import version
 
 from framework import utils
+from framework.properties import global_props
 
 
+@pytest.mark.skipif(
+    global_props.host_linux_version_tpl > (6, 1),
+    reason="The number of threads associated to firecracker changes in newer kernels",
+)
+@pytest.mark.skip(reason="TODO: fix graceful shutdown on x86_64")
 def test_reboot(uvm_plain_any):
     """
     Test reboot from guest.
diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py
index bd9f1ec0d9b..99343279cfd 100644
--- a/tests/integration_tests/functional/test_snapshot_basic.py
+++ b/tests/integration_tests/functional/test_snapshot_basic.py
@@ -332,9 +332,9 @@ def test_negative_snapshot_permissions(uvm_plain_rw, microvm_factory):
     microvm.spawn()
 
     expected_err = re.escape(
-        "Load snapshot error: Failed to restore from snapshot: Failed to load guest "
-        "memory: Error creating guest memory from file: Failed to load guest memory: "
-        "Permission denied (os error 13)"
+        "Load snapshot error: Failed to restore from snapshot: Failed to build microVM "
+        "from snapshot: Failed to load guest memory: Error creating guest memory from file: "
+        "Failed to load guest memory: Permission denied (os error 13)"
     )
     with pytest.raises(RuntimeError, match=expected_err):
         microvm.restore_from_snapshot(snapshot, resume=True)
diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py
index a67a24a4f6b..cb4121175c0 100644
--- a/tests/integration_tests/functional/test_uffd.py
+++ b/tests/integration_tests/functional/test_uffd.py
@@ -12,18 +12,20 @@
 
 
 @pytest.fixture(scope="function", name="snapshot")
-def snapshot_fxt(microvm_factory, guest_kernel_linux_5_10, rootfs):
+def snapshot_fxt(microvm_factory, guest_kernel_linux_5_10, rootfs, secret_free):
     """Create a snapshot of a microVM."""
 
     basevm = microvm_factory.build(guest_kernel_linux_5_10, rootfs)
     basevm.spawn()
-    basevm.basic_config(vcpu_count=2, mem_size_mib=256)
+    basevm.basic_config(vcpu_count=2, mem_size_mib=256, secret_free=secret_free)
     basevm.add_net_iface()
 
     # Add a memory balloon.
-    basevm.api.balloon.put(
-        amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0
-    )
+    # Note: Secret Free VMs do not support ballooning as of now.
+    if not secret_free:
+        basevm.api.balloon.put(
+            amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0
+        )
 
     basevm.start()
 
@@ -43,9 +45,9 @@ def test_bad_socket_path(uvm_plain, snapshot):
     jailed_vmstate = vm.create_jailed_resource(snapshot.vmstate)
 
     expected_msg = re.escape(
-        "Load snapshot error: Failed to restore from snapshot: Failed to load guest "
-        "memory: Error creating guest memory from uffd: Failed to connect to UDS Unix stream: No "
-        "such file or directory (os error 2)"
+        "Load snapshot error: Failed to restore from snapshot: Failed to build microVM from "
+        "snapshot: Failed to load guest memory: Error creating guest memory from uffd: Failed "
+        "to connect to UDS Unix stream: No such file or directory (os error 2)"
     )
     with pytest.raises(RuntimeError, match=expected_msg):
         vm.api.snapshot_load.put(
@@ -69,9 +71,9 @@ def test_unbinded_socket(uvm_plain, snapshot):
     jailed_sock_path = vm.create_jailed_resource(socket_path)
 
     expected_msg = re.escape(
-        "Load snapshot error: Failed to restore from snapshot: Failed to load guest "
-        "memory: Error creating guest memory from uffd: Failed to connect to UDS Unix stream: "
-        "Connection refused (os error 111)"
+        "Load snapshot error: Failed to restore from snapshot: Failed to build microVM "
+        "from snapshot: Failed to load guest memory: Error creating guest memory from uffd: "
+        "Failed to connect to UDS Unix stream: Connection refused (os error 111)"
     )
     with pytest.raises(RuntimeError, match=expected_msg):
         vm.api.snapshot_load.put(
@@ -82,6 +84,15 @@ def test_unbinded_socket(uvm_plain, snapshot):
     vm.mark_killed()
 
 
+def has_balloon_device(microvm):
+    """
+    Check if a balloon device is present in the Firecracker microVM.
+    """
+    response = microvm.api.vm_config.get()
+    config = response.json()
+    return config.get("balloon")
+
+
 def test_valid_handler(uvm_plain, snapshot):
     """
     Test valid uffd handler scenario.
@@ -91,14 +102,16 @@ def test_valid_handler(uvm_plain, snapshot):
     vm.spawn()
     vm.restore_from_snapshot(snapshot, resume=True, uffd_handler_name="on_demand")
 
-    # Inflate balloon.
-    vm.api.balloon.patch(amount_mib=200)
+    # Secret Free VMs do not support ballooning so the balloon device is not added to them.
+    if has_balloon_device(vm):
+        # Inflate balloon.
+        vm.api.balloon.patch(amount_mib=200)
 
-    # Verify if the restored guest works.
-    vm.ssh.check_output("true")
+        # Verify if the restored guest works.
+        vm.ssh.check_output("true")
 
-    # Deflate balloon.
-    vm.api.balloon.patch(amount_mib=0)
+        # Deflate balloon.
+        vm.api.balloon.patch(amount_mib=0)
 
     # Verify if the restored guest works.
     vm.ssh.check_output("true")
diff --git a/tests/integration_tests/performance/test_block.py b/tests/integration_tests/performance/test_block.py
index 8882ee0717c..fce39baab40 100644
--- a/tests/integration_tests/performance/test_block.py
+++ b/tests/integration_tests/performance/test_block.py
@@ -167,15 +167,22 @@ def test_block_performance(
     fio_block_size,
     fio_engine,
     io_engine,
+    secret_free,
     metrics,
     results_dir,
 ):
     """
     Execute block device emulation benchmarking scenarios.
     """
+    if secret_free and io_engine == "Async":
+        pytest.skip("userspace bounce buffers not supported with async block engine")
+
     vm = uvm_plain_acpi
+    vm.memory_monitor = None
     vm.spawn(log_level="Info", emit_metrics=True)
-    vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
+    vm.basic_config(
+        vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB, secret_free=secret_free
+    )
     vm.add_net_iface()
     # Add a secondary block device for benchmark tests.
     fs = drive_tools.FilesystemFile(
diff --git a/tests/integration_tests/performance/test_boottime.py b/tests/integration_tests/performance/test_boottime.py
index d80bf026a39..33327da9903 100644
--- a/tests/integration_tests/performance/test_boottime.py
+++ b/tests/integration_tests/performance/test_boottime.py
@@ -95,10 +95,18 @@ def to_ms(v, unit):
 
 
 def launch_vm_with_boot_timer(
-    microvm_factory, guest_kernel_acpi, rootfs_rw, vcpu_count, mem_size_mib, pci_enabled
+    microvm_factory,
+    guest_kernel_acpi,
+    rootfs_rw,
+    vcpu_count,
+    mem_size_mib,
+    pci_enabled,
+    secret_free,
 ):
     """Launches a microVM with guest-timer and returns the reported metrics for it"""
-    vm = microvm_factory.build(guest_kernel_acpi, rootfs_rw, pci=pci_enabled)
+    vm = microvm_factory.build(
+        guest_kernel_acpi, rootfs_rw, pci=pci_enabled, monitor_memory=False
+    )
     vm.jailer.extra_args.update({"boot-timer": None})
     vm.spawn()
     vm.basic_config(
@@ -106,6 +114,7 @@ def launch_vm_with_boot_timer(
         mem_size_mib=mem_size_mib,
         boot_args=DEFAULT_BOOT_ARGS + " init=/usr/local/bin/init",
         enable_entropy_device=True,
+        secret_free=secret_free,
     )
     vm.add_net_iface()
     vm.start()
@@ -119,7 +128,7 @@ def launch_vm_with_boot_timer(
 def test_boot_timer(microvm_factory, guest_kernel_acpi, rootfs, pci_enabled):
     """Tests that the boot timer device works"""
     launch_vm_with_boot_timer(
-        microvm_factory, guest_kernel_acpi, rootfs, 1, 128, pci_enabled
+        microvm_factory, guest_kernel_acpi, rootfs, 1, 128, pci_enabled, False
     )
 
 
@@ -135,6 +144,7 @@ def test_boottime(
     vcpu_count,
     mem_size_mib,
     pci_enabled,
+    secret_free,
     metrics,
 ):
     """Test boot time with different guest configurations"""
@@ -147,6 +157,7 @@ def test_boottime(
             vcpu_count,
             mem_size_mib,
             pci_enabled,
+            secret_free,
         )
 
         if i == 0:
diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py
index 1c5a14873d1..83bfb971685 100644
--- a/tests/integration_tests/performance/test_huge_pages.py
+++ b/tests/integration_tests/performance/test_huge_pages.py
@@ -54,6 +54,11 @@ def check_hugetlbfs_in_use(pid: int, allocation_name: str):
     assert kernel_page_size_kib > 4
 
 
+@pytest.mark.skipif(
+    global_props.host_linux_version_tpl > (6, 1)
+    and global_props.cpu_architecture == "aarch64",
+    reason="Huge page tests with secret hidden kernels on ARM currently fail",
+)
 def test_hugetlbfs_boot(uvm_plain):
     """Tests booting a microvm with guest memory backed by 2MB hugetlbfs pages"""
 
@@ -102,6 +107,11 @@ def test_hugetlbfs_snapshot(microvm_factory, uvm_plain, snapshot_type):
     check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage")
 
 
+@pytest.mark.skipif(
+    global_props.host_linux_version_tpl > (6, 1)
+    and global_props.cpu_architecture == "aarch64",
+    reason="Huge page tests with secret hidden kernels on ARM currently fail",
+)
 @pytest.mark.parametrize("huge_pages", HugePagesConfig)
 def test_ept_violation_count(
     microvm_factory,
@@ -177,6 +187,11 @@ def test_ept_violation_count(
     metrics.put_metric(metric, int(metric_value), "Count")
 
 
+@pytest.mark.skipif(
+    global_props.host_linux_version_tpl > (6, 1)
+    and global_props.cpu_architecture == "aarch64",
+    reason="Huge page tests with secret hidden kernels on ARM currently fail",
+)
 def test_negative_huge_pages_plus_balloon(uvm_plain):
     """Tests that huge pages and memory ballooning cannot be used together"""
     uvm_plain.memory_monitor = None
@@ -186,7 +201,7 @@ def test_negative_huge_pages_plus_balloon(uvm_plain):
     uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB)
     with pytest.raises(
         RuntimeError,
-        match="Firecracker's huge pages support is incompatible with memory ballooning.",
+        match="Memory ballooning is incompatible with huge pages.",
     ):
         uvm_plain.api.balloon.put(amount_mib=0, deflate_on_oom=False)
 
@@ -195,6 +210,6 @@ def test_negative_huge_pages_plus_balloon(uvm_plain):
     uvm_plain.api.balloon.put(amount_mib=0, deflate_on_oom=False)
     with pytest.raises(
         RuntimeError,
-        match="Machine config error: Firecracker's huge pages support is incompatible with memory ballooning.",
+        match="Machine config error: 'balloon device' and 'huge pages' are mutually exclusive and cannot be used together.",
     ):
         uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB)
diff --git a/tests/integration_tests/performance/test_initrd.py b/tests/integration_tests/performance/test_initrd.py
index 1bc84933fe9..0caae3b2d08 100644
--- a/tests/integration_tests/performance/test_initrd.py
+++ b/tests/integration_tests/performance/test_initrd.py
@@ -4,6 +4,7 @@
 import pytest
 
 from framework.microvm import HugePagesConfig, Serial
+from framework.properties import global_props
 
 INITRD_FILESYSTEM = "rootfs"
 
@@ -22,6 +23,11 @@ def uvm_with_initrd(
     yield uvm
 
 
+@pytest.mark.skipif(
+    global_props.host_linux_version_tpl > (6, 1)
+    and global_props.cpu_architecture == "aarch64",
+    reason="Huge page tests with secret hidden kernels on ARM currently fail",
+)
 @pytest.mark.parametrize("huge_pages", HugePagesConfig)
 def test_microvm_initrd_with_serial(uvm_with_initrd, huge_pages):
     """
diff --git a/tests/integration_tests/performance/test_network.py b/tests/integration_tests/performance/test_network.py
index 62e73e865ca..1e8fa336132 100644
--- a/tests/integration_tests/performance/test_network.py
+++ b/tests/integration_tests/performance/test_network.py
@@ -38,7 +38,7 @@ def consume_ping_output(ping_putput):
 
 
 @pytest.fixture
-def network_microvm(request, uvm_plain_acpi):
+def network_microvm(request, uvm_plain_acpi, secret_free):
     """Creates a microvm with the networking setup used by the performance tests in this file.
     This fixture receives its vcpu count via indirect parameterization"""
 
@@ -47,7 +47,9 @@ def network_microvm(request, uvm_plain_acpi):
 
     vm = uvm_plain_acpi
     vm.spawn(log_level="Info", emit_metrics=True)
-    vm.basic_config(vcpu_count=guest_vcpus, mem_size_mib=guest_mem_mib)
+    vm.basic_config(
+        vcpu_count=guest_vcpus, mem_size_mib=guest_mem_mib, secret_free=secret_free
+    )
     vm.add_net_iface()
     vm.start()
     vm.pin_threads(0)
diff --git a/tests/integration_tests/performance/test_snapshot.py b/tests/integration_tests/performance/test_snapshot.py
index b4e9afabb67..2b1f107d1c3 100644
--- a/tests/integration_tests/performance/test_snapshot.py
+++ b/tests/integration_tests/performance/test_snapshot.py
@@ -44,7 +44,9 @@ def id(self):
         """Computes a unique id for this test instance"""
         return "all_dev" if self.all_devices else f"{self.vcpus}vcpu_{self.mem}mb"
 
-    def boot_vm(self, microvm_factory, guest_kernel, rootfs, pci_enabled) -> Microvm:
+    def boot_vm(
+        self, microvm_factory, guest_kernel, rootfs, pci_enabled, secret_free
+    ) -> Microvm:
         """Creates the initial snapshot that will be loaded repeatedly to sample latencies"""
         vm = microvm_factory.build(
             guest_kernel,
@@ -59,6 +61,7 @@ def boot_vm(self, microvm_factory, guest_kernel, rootfs, pci_enabled) -> Microvm
             mem_size_mib=self.mem,
             rootfs_io_engine="Sync",
             huge_pages=self.huge_pages,
+            secret_free=secret_free,
         )
 
         for _ in range(self.nets):
@@ -107,7 +110,7 @@ def test_restore_latency(
     We only test a single guest kernel, as the guest kernel does not "participate" in snapshot restore.
     """
     vm = test_setup.boot_vm(
-        microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled
+        microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled, False
     )
 
     metrics.set_dimensions(
@@ -154,14 +157,21 @@ def test_post_restore_latency(
     metrics,
     uffd_handler,
     huge_pages,
+    secret_free,
 ):
     """Collects latency metric of post-restore memory accesses done inside the guest"""
     if huge_pages != HugePagesConfig.NONE and uffd_handler is None:
         pytest.skip("huge page snapshots can only be restored using uffd")
 
+    if secret_free and uffd_handler is None:
+        pytest.skip("Restoring from a file is not compatible with Secret Freedom")
+
+    if secret_free and huge_pages != HugePagesConfig.NONE:
+        pytest.skip("Huge pages are not supported with Secret Freedom yet")
+
     test_setup = SnapshotRestoreTest(mem=1024, vcpus=2, huge_pages=huge_pages)
     vm = test_setup.boot_vm(
-        microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled
+        microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled, secret_free
     )
 
     metrics.set_dimensions(
@@ -215,11 +225,15 @@ def test_population_latency(
     huge_pages,
     vcpus,
     mem,
+    secret_free,
 ):
     """Collects population latency metrics (e.g. how long it takes UFFD handler to fault in all memory)"""
+    if secret_free and huge_pages != HugePagesConfig.NONE:
+        pytest.skip("Huge pages are not supported with Secret Freedom yet")
+
     test_setup = SnapshotRestoreTest(mem=mem, vcpus=vcpus, huge_pages=huge_pages)
     vm = test_setup.boot_vm(
-        microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled
+        microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled, secret_free
     )
 
     metrics.set_dimensions(
@@ -267,15 +281,21 @@ def test_snapshot_create_latency(
     uvm_plain,
     metrics,
     snapshot_type,
+    secret_free,
 ):
     """Measure the latency of creating a Full snapshot"""
 
+    if secret_free and snapshot_type.needs_dirty_page_tracking:
+        pytest.skip("secret freedom and dirty pgae tracking are mutually exclusive")
+
     vm = uvm_plain
+    vm.memory_monitor = None
     vm.spawn()
     vm.basic_config(
         vcpu_count=2,
         mem_size_mib=512,
         track_dirty_pages=snapshot_type.needs_dirty_page_tracking,
+        secret_free=secret_free,
     )
     vm.start()
     vm.pin_threads(0)
diff --git a/tests/integration_tests/performance/test_vsock.py b/tests/integration_tests/performance/test_vsock.py
index fa4c3a5abb5..9b489a8c90a 100644
--- a/tests/integration_tests/performance/test_vsock.py
+++ b/tests/integration_tests/performance/test_vsock.py
@@ -81,6 +81,7 @@ def test_vsock_throughput(
     mode,
     metrics,
     results_dir,
+    secret_free,
 ):
     """
     Test vsock throughput for multiple vm configurations.
@@ -89,7 +90,9 @@ def test_vsock_throughput(
     mem_size_mib = 1024
     vm = uvm_plain_acpi
     vm.spawn(log_level="Info", emit_metrics=True)
-    vm.basic_config(vcpu_count=vcpus, mem_size_mib=mem_size_mib)
+    vm.basic_config(
+        vcpu_count=vcpus, mem_size_mib=mem_size_mib, secret_free=secret_free
+    )
     vm.add_net_iface()
     # Create a vsock device
     vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/" + VSOCK_UDS_PATH)
diff --git a/tests/pytest.ini b/tests/pytest.ini
index 5656c8eee4d..930c4891814 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -5,12 +5,13 @@ addopts =
     -vv
     --durations=10
     --showlocals
-    -m 'not nonci and not no_block_pr'
+    -m 'not nonci and not no_block_pr and not secret_hiding'
     --json-report --json-report-file=../test_results/test-report.json
 
 markers =
     no_block_pr: tests whose failure does not block PR merging.
     nonci: mark test as nonci.
+    secret_hiding: tests related to secret hiding.
 
 ; Overwrite the default norecursedirs, which includes 'build'.
 norecursedirs = .*
diff --git a/tools/devtool b/tools/devtool
index 5bac70d0310..71739df5589 100755
--- a/tools/devtool
+++ b/tools/devtool
@@ -743,12 +743,6 @@ cmd_test() {
 
     env |grep -P "^(AWS_EMF_|BUILDKITE|CODECOV_)" > env.list
     if [[ $performance_tweaks -eq 1 ]]; then
-      if [[ "$(uname --machine)" == "x86_64" ]]; then
-        say "Detected CI and performance tests, tuning CPU frequency scaling and idle states for reduced variability"
-
-        apply_performance_tweaks
-      fi
-
       # It seems that even if the tests using huge pages run sequentially on ag=1 agents, right-sizing the huge pages
       # pool to the total number of huge pages used across all tests results in spurious failures with pool depletion
       # anyway (something else on the host seems to be stealing our huge pages, and we cannot "ear mark" them for
@@ -799,10 +793,6 @@ cmd_test() {
 
     # undo performance tweaks (in case the instance gets recycled for a non-perf test)
     if [[ $performance_tweaks -eq 1 ]]; then
-      if [[ "$(uname --machine)" == "x86_64" ]]; then
-        unapply_performance_tweaks
-      fi
-
       echo $huge_pages_old |sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages >/dev/null
     fi
 
diff --git a/tools/setup-ci-artifacts.sh b/tools/setup-ci-artifacts.sh
index 10fded08787..ec8e4c7d8fd 100755
--- a/tools/setup-ci-artifacts.sh
+++ b/tools/setup-ci-artifacts.sh
@@ -12,7 +12,7 @@ say "Setup CI artifacts"
 cd build/img/$(uname -m)
 
 say "Fix executable permissions"
-find "firecracker" -type f |xargs chmod -c 755
+find "firecracker" -type f |xargs chmod -c 755 || true
 
 say "Generate SSH key to connect from host"
 if [ ! -s id_rsa ]; then