diff --git a/COPYING-5.14.0-570.25.1.el9_6 b/COPYING-5.14.0-570.26.1.el9_6
similarity index 100%
rename from COPYING-5.14.0-570.25.1.el9_6
rename to COPYING-5.14.0-570.26.1.el9_6
diff --git a/Makefile.rhelver b/Makefile.rhelver
index 02ac9c022433c..f74ec79875955 100644
--- a/Makefile.rhelver
+++ b/Makefile.rhelver
@@ -12,7 +12,7 @@ RHEL_MINOR = 6
 #
 # Use this spot to avoid future merge conflicts.
 # Do not trim this comment.
-RHEL_RELEASE = 570.25.1
+RHEL_RELEASE = 570.26.1
 
 #
 # ZSTREAM
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ca1f39e496316..1906e414303ce 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -97,6 +97,7 @@ config ARM64
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
 	select ARCH_SUPPORTS_PER_VMA_LOCK
+	select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
 	select ARCH_SUPPORTS_RT
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9e44e693fcd26..7fa291d3f90ab 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -354,6 +354,7 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
 /*
  * Select all bits except the pfn
  */
+#define pte_pgprot pte_pgprot
 static inline pgprot_t pte_pgprot(pte_t pte)
 {
 	unsigned long pfn = pte_pfn(pte);
@@ -527,6 +528,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 	return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
 }
 
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+#define pmd_special(pte)	(!!((pmd_val(pte) & PTE_SPECIAL)))
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+	return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
+}
+#endif
+
 #define __pmd_to_phys(pmd)	__pte_to_phys(pmd_pte(pmd))
 #define __phys_to_pmd_val(phys)	__phys_to_pte_val(phys)
 #define pmd_pfn(pmd)		((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
@@ -544,6 +553,27 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 #define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
 #define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+#define pud_special(pte)	pte_special(pud_pte(pud))
+#define pud_mkspecial(pte)	pte_pud(pte_mkspecial(pud_pte(pud)))
+#endif
+
+#define pmd_pgprot pmd_pgprot
+static inline pgprot_t pmd_pgprot(pmd_t pmd)
+{
+	unsigned long pfn = pmd_pfn(pmd);
+
+	return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
+}
+
+#define pud_pgprot pud_pgprot
+static inline pgprot_t pud_pgprot(pud_t pud)
+{
+	unsigned long pfn = pud_pfn(pud);
+
+	return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
+}
+
 static inline void __set_pte_at(struct mm_struct *mm,
 				unsigned long __always_unused addr,
 				pte_t *ptep, pte_t pte, unsigned int nr)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 5c12b43e746bd..4833c86ec4829 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -396,33 +396,35 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 #define __flush_tlb_range_op(op, start, pages, stride,			\
 				asid, tlb_level, tlbi_user, lpa2)	\
 do {									\
+	typeof(start) __flush_start = start;				\
+	typeof(pages) __flush_pages = pages;				\
 	int num = 0;							\
 	int scale = 3;							\
 	int shift = lpa2 ? 16 : PAGE_SHIFT;				\
 	unsigned long addr;						\
 									\
-	while (pages > 0) {						\
+	while (__flush_pages > 0) {					\
 		if (!system_supports_tlb_range() ||			\
-		    pages == 1 ||					\
-		    (lpa2 && start != ALIGN(start, SZ_64K))) {		\
-			addr = __TLBI_VADDR(start, asid);		\
+		    __flush_pages == 1 ||				\
+		    (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) {	\
+			addr = __TLBI_VADDR(__flush_start, asid);	\
 			__tlbi_level(op, addr, tlb_level);		\
 			if (tlbi_user)					\
 				__tlbi_user_level(op, addr, tlb_level);	\
-			start += stride;				\
-			pages -= stride >> PAGE_SHIFT;			\
+			__flush_start += stride;			\
+			__flush_pages -= stride >> PAGE_SHIFT;		\
 			continue;					\
 		}							\
 									\
-		num = __TLBI_RANGE_NUM(pages, scale);			\
+		num = __TLBI_RANGE_NUM(__flush_pages, scale);		\
 		if (num >= 0) {						\
-			addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+			addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
 						scale, num, tlb_level);	\
 			__tlbi(r##op, addr);				\
 			if (tlbi_user)					\
 				__tlbi_user(r##op, addr);		\
-			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
-			pages -= __TLBI_RANGE_PAGES(num, scale);	\
+			__flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+			__flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
 		}							\
 		scale--;						\
 	}								\
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 6543642f56e45..96f6376ad2bde 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -44,6 +44,7 @@ static inline unsigned long pte_pfn(pte_t pte)
 /*
  * Select all bits except the pfn
  */
+#define pte_pgprot pte_pgprot
 static inline pgprot_t pte_pgprot(pte_t pte)
 {
 	unsigned long pte_flags;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 2d2d224207bcc..61ec7143cddbb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -941,6 +941,7 @@ static inline int pte_unused(pte_t pte)
  * young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
  * must not be set.
  */
+#define pte_pgprot pte_pgprot
 static inline pgprot_t pte_pgprot(pte_t pte)
 {
 	unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 5880893329310..84a8c8f517fb2 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -118,12 +118,11 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
 SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 		const void __user *, user_buffer, size_t, length)
 {
+	struct follow_pfnmap_args args = { };
 	u8 local_buf[64];
 	void __iomem *io_addr;
 	void *buf;
 	struct vm_area_struct *vma;
-	pte_t *ptep;
-	spinlock_t *ptl;
 	long ret;
 
 	if (!zpci_is_enabled())
@@ -169,11 +168,13 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 	if (!(vma->vm_flags & VM_WRITE))
 		goto out_unlock_mmap;
 
-	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+	args.address = mmio_addr;
+	args.vma = vma;
+	ret = follow_pfnmap_start(&args);
 	if (ret)
 		goto out_unlock_mmap;
 
-	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+	io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
 			(mmio_addr & ~PAGE_MASK));
 
 	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
@@ -181,7 +182,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 
 	ret = zpci_memcpy_toio(io_addr, buf, length);
 out_unlock_pt:
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 out_unlock_mmap:
 	mmap_read_unlock(current->mm);
 out_free:
@@ -260,12 +261,11 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
 SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 		void __user *, user_buffer, size_t, length)
 {
+	struct follow_pfnmap_args args = { };
 	u8 local_buf[64];
 	void __iomem *io_addr;
 	void *buf;
 	struct vm_area_struct *vma;
-	pte_t *ptep;
-	spinlock_t *ptl;
 	long ret;
 
 	if (!zpci_is_enabled())
@@ -308,11 +308,13 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 	if (!(vma->vm_flags & VM_WRITE))
 		goto out_unlock_mmap;
 
-	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+	args.vma = vma;
+	args.address = mmio_addr;
+	ret = follow_pfnmap_start(&args);
 	if (ret)
 		goto out_unlock_mmap;
 
-	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+	io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
 			(mmio_addr & ~PAGE_MASK));
 
 	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
@@ -322,7 +324,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 	ret = zpci_memcpy_fromio(buf, io_addr, length);
 
 out_unlock_pt:
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 out_unlock_mmap:
 	mmap_read_unlock(current->mm);
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 896d9b786736d..da3a5f673ca59 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -782,6 +782,7 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 	return __pmd(pte_val(pte));
 }
 
+#define pmd_pgprot pmd_pgprot
 static inline pgprot_t pmd_pgprot(pmd_t entry)
 {
 	unsigned long val = pmd_val(entry);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a5a59118efe4b..5d4f050bd59f9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86_64
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
 	select ARCH_SUPPORTS_PER_VMA_LOCK
+	select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
 	select ARCH_SUPPORTS_RT
 	select HAVE_ARCH_SOFT_DIRTY
 	select MODULES_USE_ELF_RELA
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 8149afec43a4e..c5bc120fade87 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -121,6 +121,34 @@ extern pmdval_t early_pmd_flags;
 #define arch_end_context_switch(prev)	do {} while(0)
 #endif	/* CONFIG_PARAVIRT_XXL */
 
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+	pmdval_t v = native_pmd_val(pmd);
+
+	return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+	pudval_t v = native_pud_val(pud);
+
+	return native_make_pud(v & ~clear);
+}
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -310,6 +338,30 @@ static inline int pud_devmap(pud_t pud)
 }
 #endif
 
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_SPECIAL;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+	return pmd_set_flags(pmd, _PAGE_SPECIAL);
+}
+#endif	/* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+	return pud_flags(pud) & _PAGE_SPECIAL;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+	return pud_set_flags(pud, _PAGE_SPECIAL);
+}
+#endif	/* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
 static inline int pgd_devmap(pgd_t pgd)
 {
 	return 0;
@@ -480,20 +532,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
 	return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
 }
 
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
-{
-	pmdval_t v = native_pmd_val(pmd);
-
-	return native_make_pmd(v | set);
-}
-
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
-{
-	pmdval_t v = native_pmd_val(pmd);
-
-	return native_make_pmd(v & ~clear);
-}
-
 /* See comments above mksaveddirty_shift() */
 static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
 {
@@ -588,20 +626,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 #define pmd_mkwrite pmd_mkwrite
 
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
-{
-	pudval_t v = native_pud_val(pud);
-
-	return native_make_pud(v | set);
-}
-
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
-{
-	pudval_t v = native_pud_val(pud);
-
-	return native_make_pud(v & ~clear);
-}
-
 /* See comments above mksaveddirty_shift() */
 static inline pud_t pud_mksaveddirty(pud_t pud)
 {
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c0d56c02b8da9..9e84bcedd9adf 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -834,7 +834,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
 		return ret;
 	}
 
-	for_each_node(nid) {
+	for_each_node_with_cpus(nid) {
 		cpu = cpumask_first(cpumask_of_node(nid));
 		c = &cpu_data(cpu);
 
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 36b603d0cddef..fd210b362a04d 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -39,6 +39,7 @@
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <linux/fs.h>
 #include <linux/rbtree.h>
 
@@ -947,6 +948,26 @@ static void free_pfn_range(u64 paddr, unsigned long size)
 		memtype_free(paddr, paddr + size);
 }
 
+static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
+		resource_size_t *phys)
+{
+	struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
+
+	if (follow_pfnmap_start(&args))
+		return -EINVAL;
+
+	/* Never return PFNs of anon folios in COW mappings. */
+	if (!args.special) {
+		follow_pfnmap_end(&args);
+		return -EINVAL;
+	}
+
+	*prot = pgprot_val(args.pgprot);
+	*phys = (resource_size_t)args.pfn << PAGE_SHIFT;
+	follow_pfnmap_end(&args);
+	return 0;
+}
+
 static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
 		pgprot_t *pgprot)
 {
@@ -964,7 +985,7 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
 	 * detect the PFN. If we need the cachemode as well, we're out of luck
 	 * for now and have to fail fork().
 	 */
-	if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+	if (!follow_phys(vma, &prot, paddr)) {
 		if (pgprot)
 			*pgprot = __pgprot(prot);
 		return 0;
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/09dfc8a5.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/09dfc8a5.failed
new file mode 100644
index 0000000000000..7cb365f0647ea
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/09dfc8a5.failed
@@ -0,0 +1,134 @@
+vfio/pci: Fallback huge faults for unaligned pfn
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Alex Williamson <alex.williamson@redhat.com>
+commit 09dfc8a5f2ce897005a94bf66cca4f91e4e03700
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/09dfc8a5.failed
+
+The PFN must also be aligned to the fault order to insert a huge
+pfnmap.  Test the alignment and fallback when unaligned.
+
+Fixes: f9e54c3a2f5b ("vfio/pci: implement huge_fault support")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219619
+	Reported-by: Athul Krishna <athul.krishna.kr@protonmail.com>
+	Reported-by: Precific <precification@posteo.de>
+	Reviewed-by: Peter Xu <peterx@redhat.com>
+	Tested-by: Precific <precification@posteo.de>
+Link: https://lore.kernel.org/r/20250102183416.1841878-1-alex.williamson@redhat.com
+	Cc: stable@vger.kernel.org
+	Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+(cherry picked from commit 09dfc8a5f2ce897005a94bf66cca4f91e4e03700)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	drivers/vfio/pci/vfio_pci_core.c
+diff --cc drivers/vfio/pci/vfio_pci_core.c
+index ffda816e0119,1a4ed5a357d3..000000000000
+--- a/drivers/vfio/pci/vfio_pci_core.c
++++ b/drivers/vfio/pci/vfio_pci_core.c
+@@@ -1770,49 -1658,59 +1770,87 @@@ static vm_fault_t vfio_pci_mmap_fault(s
+  {
+  	struct vm_area_struct *vma = vmf->vma;
+  	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+++<<<<<<< HEAD
+ +	struct vfio_pci_mmap_vma *mmap_vma;
+ +	vm_fault_t ret = VM_FAULT_NOPAGE;
+ +
+ +	mutex_lock(&vdev->vma_lock);
+ +	down_read(&vdev->memory_lock);
+ +
+ +	/*
+ +	 * Memory region cannot be accessed if the low power feature is engaged
+ +	 * or memory access is disabled.
+ +	 */
+ +	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
+ +		ret = VM_FAULT_SIGBUS;
+ +		goto up_out;
+++=======
++ 	unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
++ 	vm_fault_t ret = VM_FAULT_SIGBUS;
++ 
++ 	pfn = vma_to_pfn(vma) + pgoff;
++ 
++ 	if (order && (pfn & ((1 << order) - 1) ||
++ 		      vmf->address & ((PAGE_SIZE << order) - 1) ||
++ 		      vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
++ 		ret = VM_FAULT_FALLBACK;
++ 		goto out;
++ 	}
++ 
++ 	down_read(&vdev->memory_lock);
++ 
++ 	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
++ 		goto out_unlock;
++ 
++ 	switch (order) {
++ 	case 0:
++ 		ret = vmf_insert_pfn(vma, vmf->address, pfn);
++ 		break;
++ #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
++ 	case PMD_ORDER:
++ 		ret = vmf_insert_pfn_pmd(vmf,
++ 					 __pfn_to_pfn_t(pfn, PFN_DEV), false);
++ 		break;
++ #endif
++ #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
++ 	case PUD_ORDER:
++ 		ret = vmf_insert_pfn_pud(vmf,
++ 					 __pfn_to_pfn_t(pfn, PFN_DEV), false);
++ 		break;
++ #endif
++ 	default:
++ 		ret = VM_FAULT_FALLBACK;
+++>>>>>>> 09dfc8a5f2ce (vfio/pci: Fallback huge faults for unaligned pfn)
+  	}
+  
+ -out_unlock:
+ -	up_read(&vdev->memory_lock);
+ -out:
+ -	dev_dbg_ratelimited(&vdev->pdev->dev,
+ -			   "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
+ -			    __func__, order,
+ -			    vma->vm_pgoff >>
+ -				(VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
+ -			    pgoff, (unsigned int)ret);
+ +	/*
+ +	 * We populate the whole vma on fault, so we need to test whether
+ +	 * the vma has already been mapped, such as for concurrent faults
+ +	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
+ +	 * we ask it to fill the same range again.
+ +	 */
+ +	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+ +		if (mmap_vma->vma == vma)
+ +			goto up_out;
+ +	}
+  
+ -	return ret;
+ -}
+ +	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ +			       vma->vm_end - vma->vm_start,
+ +			       vma->vm_page_prot)) {
+ +		ret = VM_FAULT_SIGBUS;
+ +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +		goto up_out;
+ +	}
+  
+ -static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
+ -{
+ -	return vfio_pci_mmap_huge_fault(vmf, 0);
+ +	if (__vfio_pci_add_vma(vdev, vma)) {
+ +		ret = VM_FAULT_OOM;
+ +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +	}
+ +
+ +up_out:
+ +	up_read(&vdev->memory_lock);
+ +	mutex_unlock(&vdev->vma_lock);
+ +	return ret;
+  }
+  
+  static const struct vm_operations_struct vfio_pci_mmap_ops = {
+* Unmerged path drivers/vfio/pci/vfio_pci_core.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/0fd06844.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/0fd06844.failed
new file mode 100644
index 0000000000000..da674785e8369
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/0fd06844.failed
@@ -0,0 +1,83 @@
+vfio/type1: Use mapping page mask for pfnmaps
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Alex Williamson <alex.williamson@redhat.com>
+commit 0fd06844de5d063cb384384e06a11ec7141a35d5
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/0fd06844.failed
+
+vfio-pci supports huge_fault for PCI MMIO BARs and will insert pud and
+pmd mappings for well aligned mappings.  follow_pfnmap_start() walks the
+page table and therefore knows the page mask of the level where the
+address is found and returns this through follow_pfnmap_args.addr_mask.
+Subsequent pfns from this address until the end of the mapping page are
+necessarily consecutive.  Use this information to retrieve a range of
+pfnmap pfns in a single pass.
+
+With optimal mappings and alignment on systems with 1GB pud and 4KB
+page size, this reduces iterations for DMA mapping PCI BARs by a
+factor of 256K.  In real world testing, the overhead of iterating
+pfns for a VM DMA mapping a 32GB PCI BAR is reduced from ~1s to
+sub-millisecond overhead.
+
+	Reviewed-by: Peter Xu <peterx@redhat.com>
+	Reviewed-by: Mitchell Augustin <mitchell.augustin@canonical.com>
+	Tested-by: Mitchell Augustin <mitchell.augustin@canonical.com>
+	Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Link: https://lore.kernel.org/r/20250218222209.1382449-7-alex.williamson@redhat.com
+	Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+(cherry picked from commit 0fd06844de5d063cb384384e06a11ec7141a35d5)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	drivers/vfio/vfio_iommu_type1.c
+diff --cc drivers/vfio/vfio_iommu_type1.c
+index 410214696525,0ac56072af9f..000000000000
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@@ -523,14 -520,12 +523,14 @@@ static void vfio_batch_fini(struct vfio
+  
+  static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
+  			    unsigned long vaddr, unsigned long *pfn,
+- 			    bool write_fault)
++ 			    unsigned long *addr_mask, bool write_fault)
+  {
+ -	struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
+ +	pte_t *ptep;
+ +	pte_t pte;
+ +	spinlock_t *ptl;
+  	int ret;
+  
+ -	ret = follow_pfnmap_start(&args);
+ +	ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+  	if (ret) {
+  		bool unlocked = false;
+  
+@@@ -549,14 -544,14 +549,23 @@@
+  			return ret;
+  	}
+  
+++<<<<<<< HEAD
+ +	pte = ptep_get(ptep);
+ +
+ +	if (write_fault && !pte_write(pte))
+ +		ret = -EFAULT;
+ +	else
+ +		*pfn = pte_pfn(pte);
+++=======
++ 	if (write_fault && !args.writable) {
++ 		ret = -EFAULT;
++ 	} else {
++ 		*pfn = args.pfn;
++ 		*addr_mask = args.addr_mask;
++ 	}
+++>>>>>>> 0fd06844de5d (vfio/type1: Use mapping page mask for pfnmaps)
+  
+ -	follow_pfnmap_end(&args);
+ +	pte_unmap_unlock(ptep, ptl);
+  	return ret;
+  }
+  
+* Unmerged path drivers/vfio/vfio_iommu_type1.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/10d83d77.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/10d83d77.failed
new file mode 100644
index 0000000000000..be51f1ee459b8
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/10d83d77.failed
@@ -0,0 +1,278 @@
+mm/pagewalk: check pfnmap for folio_walk_start()
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit 10d83d7781a8a6ff02bafd172c1ab183b27f8d5a
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/10d83d77.failed
+
+Teach folio_walk_start() to recognize special pmd/pud mappings, and fail
+them properly as it means there's no folio backing them.
+
+[peterx@redhat.com: remove some stale comments, per David]
+  Link: https://lkml.kernel.org/r/20240829202237.2640288-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20240826204353.2228736-7-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 10d83d7781a8a6ff02bafd172c1ab183b27f8d5a)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/pagewalk.c
+diff --cc mm/pagewalk.c
+index b7d7e4fcfad7,461ea3bbd8d9..000000000000
+--- a/mm/pagewalk.c
++++ b/mm/pagewalk.c
+@@@ -676,3 -656,203 +676,206 @@@ int walk_page_mapping(struct address_sp
+  
+  	return err;
+  }
+++<<<<<<< HEAD
+++=======
++ 
++ /**
++  * folio_walk_start - walk the page tables to a folio
++  * @fw: filled with information on success.
++  * @vma: the VMA.
++  * @addr: the virtual address to use for the page table walk.
++  * @flags: flags modifying which folios to walk to.
++  *
++  * Walk the page tables using @addr in a given @vma to a mapped folio and
++  * return the folio, making sure that the page table entry referenced by
++  * @addr cannot change until folio_walk_end() was called.
++  *
++  * As default, this function returns only folios that are not special (e.g., not
++  * the zeropage) and never returns folios that are supposed to be ignored by the
++  * VM as documented by vm_normal_page(). If requested, zeropages will be
++  * returned as well.
++  *
++  * As default, this function only considers present page table entries.
++  * If requested, it will also consider migration entries.
++  *
++  * If this function returns NULL it might either indicate "there is nothing" or
++  * "there is nothing suitable".
++  *
++  * On success, @fw is filled and the function returns the folio while the PTL
++  * is still held and folio_walk_end() must be called to clean up,
++  * releasing any held locks. The returned folio must *not* be used after the
++  * call to folio_walk_end(), unless a short-term folio reference is taken before
++  * that call.
++  *
++  * @fw->page will correspond to the page that is effectively referenced by
++  * @addr. However, for migration entries and shared zeropages @fw->page is
++  * set to NULL. Note that large folios might be mapped by multiple page table
++  * entries, and this function will always only lookup a single entry as
++  * specified by @addr, which might or might not cover more than a single page of
++  * the returned folio.
++  *
++  * This function must *not* be used as a naive replacement for
++  * get_user_pages() / pin_user_pages(), especially not to perform DMA or
++  * to carelessly modify page content. This function may *only* be used to grab
++  * short-term folio references, never to grab long-term folio references.
++  *
++  * Using the page table entry pointers in @fw for reading or modifying the
++  * entry should be avoided where possible: however, there might be valid
++  * use cases.
++  *
++  * WARNING: Modifying page table entries in hugetlb VMAs requires a lot of care.
++  * For example, PMD page table sharing might require prior unsharing. Also,
++  * logical hugetlb entries might span multiple physical page table entries,
++  * which *must* be modified in a single operation (set_huge_pte_at(),
++  * huge_ptep_set_*, ...). Note that the page table entry stored in @fw might
++  * not correspond to the first physical entry of a logical hugetlb entry.
++  *
++  * The mmap lock must be held in read mode.
++  *
++  * Return: folio pointer on success, otherwise NULL.
++  */
++ struct folio *folio_walk_start(struct folio_walk *fw,
++ 		struct vm_area_struct *vma, unsigned long addr,
++ 		folio_walk_flags_t flags)
++ {
++ 	unsigned long entry_size;
++ 	bool expose_page = true;
++ 	struct page *page;
++ 	pud_t *pudp, pud;
++ 	pmd_t *pmdp, pmd;
++ 	pte_t *ptep, pte;
++ 	spinlock_t *ptl;
++ 	pgd_t *pgdp;
++ 	p4d_t *p4dp;
++ 
++ 	mmap_assert_locked(vma->vm_mm);
++ 	vma_pgtable_walk_begin(vma);
++ 
++ 	if (WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end))
++ 		goto not_found;
++ 
++ 	pgdp = pgd_offset(vma->vm_mm, addr);
++ 	if (pgd_none_or_clear_bad(pgdp))
++ 		goto not_found;
++ 
++ 	p4dp = p4d_offset(pgdp, addr);
++ 	if (p4d_none_or_clear_bad(p4dp))
++ 		goto not_found;
++ 
++ 	pudp = pud_offset(p4dp, addr);
++ 	pud = pudp_get(pudp);
++ 	if (pud_none(pud))
++ 		goto not_found;
++ 	if (IS_ENABLED(CONFIG_PGTABLE_HAS_HUGE_LEAVES) && pud_leaf(pud)) {
++ 		ptl = pud_lock(vma->vm_mm, pudp);
++ 		pud = pudp_get(pudp);
++ 
++ 		entry_size = PUD_SIZE;
++ 		fw->level = FW_LEVEL_PUD;
++ 		fw->pudp = pudp;
++ 		fw->pud = pud;
++ 
++ 		if (!pud_present(pud) || pud_devmap(pud) || pud_special(pud)) {
++ 			spin_unlock(ptl);
++ 			goto not_found;
++ 		} else if (!pud_leaf(pud)) {
++ 			spin_unlock(ptl);
++ 			goto pmd_table;
++ 		}
++ 		/*
++ 		 * TODO: vm_normal_page_pud() will be handy once we want to
++ 		 * support PUD mappings in VM_PFNMAP|VM_MIXEDMAP VMAs.
++ 		 */
++ 		page = pud_page(pud);
++ 		goto found;
++ 	}
++ 
++ pmd_table:
++ 	VM_WARN_ON_ONCE(pud_leaf(*pudp));
++ 	pmdp = pmd_offset(pudp, addr);
++ 	pmd = pmdp_get_lockless(pmdp);
++ 	if (pmd_none(pmd))
++ 		goto not_found;
++ 	if (IS_ENABLED(CONFIG_PGTABLE_HAS_HUGE_LEAVES) && pmd_leaf(pmd)) {
++ 		ptl = pmd_lock(vma->vm_mm, pmdp);
++ 		pmd = pmdp_get(pmdp);
++ 
++ 		entry_size = PMD_SIZE;
++ 		fw->level = FW_LEVEL_PMD;
++ 		fw->pmdp = pmdp;
++ 		fw->pmd = pmd;
++ 
++ 		if (pmd_none(pmd)) {
++ 			spin_unlock(ptl);
++ 			goto not_found;
++ 		} else if (!pmd_leaf(pmd)) {
++ 			spin_unlock(ptl);
++ 			goto pte_table;
++ 		} else if (pmd_present(pmd)) {
++ 			page = vm_normal_page_pmd(vma, addr, pmd);
++ 			if (page) {
++ 				goto found;
++ 			} else if ((flags & FW_ZEROPAGE) &&
++ 				    is_huge_zero_pmd(pmd)) {
++ 				page = pfn_to_page(pmd_pfn(pmd));
++ 				expose_page = false;
++ 				goto found;
++ 			}
++ 		} else if ((flags & FW_MIGRATION) &&
++ 			   is_pmd_migration_entry(pmd)) {
++ 			swp_entry_t entry = pmd_to_swp_entry(pmd);
++ 
++ 			page = pfn_swap_entry_to_page(entry);
++ 			expose_page = false;
++ 			goto found;
++ 		}
++ 		spin_unlock(ptl);
++ 		goto not_found;
++ 	}
++ 
++ pte_table:
++ 	VM_WARN_ON_ONCE(pmd_leaf(pmdp_get_lockless(pmdp)));
++ 	ptep = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
++ 	if (!ptep)
++ 		goto not_found;
++ 	pte = ptep_get(ptep);
++ 
++ 	entry_size = PAGE_SIZE;
++ 	fw->level = FW_LEVEL_PTE;
++ 	fw->ptep = ptep;
++ 	fw->pte = pte;
++ 
++ 	if (pte_present(pte)) {
++ 		page = vm_normal_page(vma, addr, pte);
++ 		if (page)
++ 			goto found;
++ 		if ((flags & FW_ZEROPAGE) &&
++ 		    is_zero_pfn(pte_pfn(pte))) {
++ 			page = pfn_to_page(pte_pfn(pte));
++ 			expose_page = false;
++ 			goto found;
++ 		}
++ 	} else if (!pte_none(pte)) {
++ 		swp_entry_t entry = pte_to_swp_entry(pte);
++ 
++ 		if ((flags & FW_MIGRATION) &&
++ 		    is_migration_entry(entry)) {
++ 			page = pfn_swap_entry_to_page(entry);
++ 			expose_page = false;
++ 			goto found;
++ 		}
++ 	}
++ 	pte_unmap_unlock(ptep, ptl);
++ not_found:
++ 	vma_pgtable_walk_end(vma);
++ 	return NULL;
++ found:
++ 	if (expose_page)
++ 		/* Note: Offset from the mapped page, not the folio start. */
++ 		fw->page = nth_page(page, (addr & (entry_size - 1)) >> PAGE_SHIFT);
++ 	else
++ 		fw->page = NULL;
++ 	fw->ptl = ptl;
++ 	return page_folio(page);
++ }
+++>>>>>>> 10d83d7781a8 (mm/pagewalk: check pfnmap for folio_walk_start())
+diff --git a/mm/memory.c b/mm/memory.c
+index e2794e3b8919..e8a797dd7721 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -659,11 +659,10 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+ {
+ 	unsigned long pfn = pmd_pfn(pmd);
+ 
+-	/*
+-	 * There is no pmd_special() but there may be special pmds, e.g.
+-	 * in a direct-access (dax) mapping, so let's just replicate the
+-	 * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
+-	 */
++	/* Currently it's only used for huge pfnmaps */
++	if (unlikely(pmd_special(pmd)))
++		return NULL;
++
+ 	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+ 		if (vma->vm_flags & VM_MIXEDMAP) {
+ 			if (!pfn_valid(pfn))
+* Unmerged path mm/pagewalk.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/29ae7d96.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/29ae7d96.failed
new file mode 100644
index 0000000000000..6ceae3c4b0909
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/29ae7d96.failed
@@ -0,0 +1,287 @@
+mm: pass VMA instead of MM to follow_pte()
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author David Hildenbrand <david@redhat.com>
+commit 29ae7d96d166fa08c7232daf8a314ef5ba1efd20
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/29ae7d96.failed
+
+... and centralize the VM_IO/VM_PFNMAP sanity check in there. We'll
+now also perform these sanity checks for direct follow_pte()
+invocations.
+
+For generic_access_phys(), we might now check multiple times: nothing to
+worry about, really.
+
+Link: https://lkml.kernel.org/r/20240410155527.474777-3-david@redhat.com
+	Signed-off-by: David Hildenbrand <david@redhat.com>
+	Acked-by: Sean Christopherson <seanjc@google.com>	[KVM]
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Christoph Hellwig <hch@lst.de>
+	Cc: Fei Li <fei1.li@intel.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Yonghua Huang <yonghua.huang@intel.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 29ae7d96d166fa08c7232daf8a314ef5ba1efd20)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	arch/x86/mm/pat/memtype.c
+#	drivers/virt/acrn/mm.c
+diff --cc arch/x86/mm/pat/memtype.c
+index 36b603d0cdde,bdc2a240c2aa..000000000000
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@@ -947,6 -948,29 +947,32 @@@ static void free_pfn_range(u64 paddr, u
+  		memtype_free(paddr, paddr + size);
+  }
+  
+++<<<<<<< HEAD
+++=======
++ static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
++ 		resource_size_t *phys)
++ {
++ 	pte_t *ptep, pte;
++ 	spinlock_t *ptl;
++ 
++ 	if (follow_pte(vma, vma->vm_start, &ptep, &ptl))
++ 		return -EINVAL;
++ 
++ 	pte = ptep_get(ptep);
++ 
++ 	/* Never return PFNs of anon folios in COW mappings. */
++ 	if (vm_normal_folio(vma, vma->vm_start, pte)) {
++ 		pte_unmap_unlock(ptep, ptl);
++ 		return -EINVAL;
++ 	}
++ 
++ 	*prot = pgprot_val(pte_pgprot(pte));
++ 	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
++ 	pte_unmap_unlock(ptep, ptl);
++ 	return 0;
++ }
++ 
+++>>>>>>> 29ae7d96d166 (mm: pass VMA instead of MM to follow_pte())
+  static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+  		pgprot_t *pgprot)
+  {
+diff --cc drivers/virt/acrn/mm.c
+index c4f2e15c8a2b,db8ff1d0ac23..000000000000
+--- a/drivers/virt/acrn/mm.c
++++ b/drivers/virt/acrn/mm.c
+@@@ -168,7 -170,69 +168,73 @@@ int acrn_vm_ram_map(struct acrn_vm *vm
+  
+  	/* Get the page number of the map region */
+  	nr_pages = memmap->len >> PAGE_SHIFT;
+++<<<<<<< HEAD
+ +	pages = vzalloc(nr_pages * sizeof(struct page *));
+++=======
++ 	if (!nr_pages)
++ 		return -EINVAL;
++ 
++ 	mmap_read_lock(current->mm);
++ 	vma = vma_lookup(current->mm, memmap->vma_base);
++ 	if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) {
++ 		unsigned long start_pfn, cur_pfn;
++ 		spinlock_t *ptl;
++ 		bool writable;
++ 		pte_t *ptep;
++ 
++ 		if ((memmap->vma_base + memmap->len) > vma->vm_end) {
++ 			mmap_read_unlock(current->mm);
++ 			return -EINVAL;
++ 		}
++ 
++ 		for (i = 0; i < nr_pages; i++) {
++ 			ret = follow_pte(vma, memmap->vma_base + i * PAGE_SIZE,
++ 					 &ptep, &ptl);
++ 			if (ret)
++ 				break;
++ 
++ 			cur_pfn = pte_pfn(ptep_get(ptep));
++ 			if (i == 0)
++ 				start_pfn = cur_pfn;
++ 			writable = !!pte_write(ptep_get(ptep));
++ 			pte_unmap_unlock(ptep, ptl);
++ 
++ 			/* Disallow write access if the PTE is not writable. */
++ 			if (!writable &&
++ 			    (memmap->attr & ACRN_MEM_ACCESS_WRITE)) {
++ 				ret = -EFAULT;
++ 				break;
++ 			}
++ 
++ 			/* Disallow refcounted pages. */
++ 			if (pfn_valid(cur_pfn) &&
++ 			    !PageReserved(pfn_to_page(cur_pfn))) {
++ 				ret = -EFAULT;
++ 				break;
++ 			}
++ 
++ 			/* Disallow non-contiguous ranges. */
++ 			if (cur_pfn != start_pfn + i) {
++ 				ret = -EINVAL;
++ 				break;
++ 			}
++ 		}
++ 		mmap_read_unlock(current->mm);
++ 
++ 		if (ret) {
++ 			dev_dbg(acrn_dev.this_device,
++ 				"Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base);
++ 			return ret;
++ 		}
++ 
++ 		return acrn_mm_region_add(vm, memmap->user_vm_pa,
++ 			 PFN_PHYS(start_pfn), memmap->len,
++ 			 ACRN_MEM_TYPE_WB, memmap->attr);
++ 	}
++ 	mmap_read_unlock(current->mm);
++ 
++ 	pages = vzalloc(array_size(nr_pages, sizeof(*pages)));
+++>>>>>>> 29ae7d96d166 (mm: pass VMA instead of MM to follow_pte())
+  	if (!pages)
+  		return -ENOMEM;
+  
+diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
+index 588089332931..bca6af2ee723 100644
+--- a/arch/s390/pci/pci_mmio.c
++++ b/arch/s390/pci/pci_mmio.c
+@@ -169,7 +169,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
+ 	if (!(vma->vm_flags & VM_WRITE))
+ 		goto out_unlock_mmap;
+ 
+-	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
++	ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
+ 	if (ret)
+ 		goto out_unlock_mmap;
+ 
+@@ -308,7 +308,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
+ 	if (!(vma->vm_flags & VM_WRITE))
+ 		goto out_unlock_mmap;
+ 
+-	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
++	ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
+ 	if (ret)
+ 		goto out_unlock_mmap;
+ 
+* Unmerged path arch/x86/mm/pat/memtype.c
+diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
+index 6c6586af7953..ec4d0003ba2f 100644
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -520,7 +520,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
+ 	spinlock_t *ptl;
+ 	int ret;
+ 
+-	ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
++	ret = follow_pte(vma, vaddr, &ptep, &ptl);
+ 	if (ret) {
+ 		bool unlocked = false;
+ 
+@@ -534,7 +534,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
++		ret = follow_pte(vma, vaddr, &ptep, &ptl);
+ 		if (ret)
+ 			return ret;
+ 	}
+* Unmerged path drivers/virt/acrn/mm.c
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 196c481ec160..b85fd05660e5 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2427,7 +2427,7 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+ 		unsigned long end, unsigned long floor, unsigned long ceiling);
+ int
+ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+-int follow_pte(struct mm_struct *mm, unsigned long address,
++int follow_pte(struct vm_area_struct *vma, unsigned long address,
+ 	       pte_t **ptepp, spinlock_t **ptlp);
+ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ 	unsigned long *pfn);
+diff --git a/mm/memory.c b/mm/memory.c
+index e2794e3b8919..6706b9830402 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5609,7 +5609,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+ 
+ /**
+  * follow_pte - look up PTE at a user virtual address
+- * @mm: the mm_struct of the target address space
++ * @vma: the memory mapping
+  * @address: user virtual address
+  * @ptepp: location to store found PTE
+  * @ptlp: location to store the lock for the PTE
+@@ -5628,15 +5628,19 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+  *
+  * Return: zero on success, -ve otherwise.
+  */
+-int follow_pte(struct mm_struct *mm, unsigned long address,
++int follow_pte(struct vm_area_struct *vma, unsigned long address,
+ 	       pte_t **ptepp, spinlock_t **ptlp)
+ {
++	struct mm_struct *mm = vma->vm_mm;
+ 	pgd_t *pgd;
+ 	p4d_t *p4d;
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+ 	pte_t *ptep;
+ 
++	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
++		goto out;
++
+ 	pgd = pgd_offset(mm, address);
+ 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ 		goto out;
+@@ -5754,11 +5758,8 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+ 	int offset = offset_in_page(addr);
+ 	int ret = -EINVAL;
+ 
+-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+-		return -EINVAL;
+-
+ retry:
+-	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
++	if (follow_pte(vma, addr, &ptep, &ptl))
+ 		return -EINVAL;
+ 	pte = ptep_get(ptep);
+ 	pte_unmap_unlock(ptep, ptl);
+@@ -5773,7 +5774,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+ 	if (!maddr)
+ 		return -ENOMEM;
+ 
+-	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
++	if (follow_pte(vma, addr, &ptep, &ptl))
+ 		goto out_unmap;
+ 
+ 	if (!pte_same(pte, ptep_get(ptep))) {
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index b163a079fe65..acd8c5aee080 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -2884,7 +2884,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+ 	spinlock_t *ptl;
+ 	int r;
+ 
+-	r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
++	r = follow_pte(vma, addr, &ptep, &ptl);
+ 	if (r) {
+ 		/*
+ 		 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+@@ -2899,7 +2899,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+ 		if (r)
+ 			return r;
+ 
+-		r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
++		r = follow_pte(vma, addr, &ptep, &ptl);
+ 		if (r)
+ 			return r;
+ 	}
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/3e509c9b.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/3e509c9b.failed
new file mode 100644
index 0000000000000..c30be2e3188c4
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/3e509c9b.failed
@@ -0,0 +1,118 @@
+mm/arm64: support large pfn mappings
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit 3e509c9b03f9abc7804c80bed266a6cc4286a5a8
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/3e509c9b.failed
+
+Support huge pfnmaps by using bit 56 (PTE_SPECIAL) for "special" on
+pmds/puds.  Provide the pmd/pud helpers to set/get special bit.
+
+There's one more thing missing for arm64 which is the pxx_pgprot() for
+pmd/pud.  Add them too, which is mostly the same as the pte version by
+dropping the pfn field.  These helpers are essential to be used in the new
+follow_pfnmap*() API to report valid pgprot_t results.
+
+Note that arm64 doesn't yet support huge PUD yet, but it's still
+straightforward to provide the pud helpers that we need altogether.  Only
+PMD helpers will make an immediate benefit until arm64 will support huge
+PUDs first in general (e.g.  in THPs).
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-19-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 3e509c9b03f9abc7804c80bed266a6cc4286a5a8)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	arch/arm64/Kconfig
+diff --cc arch/arm64/Kconfig
+index ca1f39e49631,6607ed8fdbb4..000000000000
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@@ -97,7 -99,7 +97,11 @@@ config ARM6
+  	select ARCH_SUPPORTS_NUMA_BALANCING
+  	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+  	select ARCH_SUPPORTS_PER_VMA_LOCK
+++<<<<<<< HEAD
+ +	select ARCH_SUPPORTS_RT
+++=======
++ 	select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
+++>>>>>>> 3e509c9b03f9 (mm/arm64: support large pfn mappings)
+  	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+  	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+  	select ARCH_WANT_DEFAULT_BPF_JIT
+* Unmerged path arch/arm64/Kconfig
+diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
+index e3ea0ef9673d..7fa291d3f90a 100644
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -528,6 +528,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+ 	return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
+ }
+ 
++#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
++#define pmd_special(pte)	(!!((pmd_val(pte) & PTE_SPECIAL)))
++static inline pmd_t pmd_mkspecial(pmd_t pmd)
++{
++	return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
++}
++#endif
++
+ #define __pmd_to_phys(pmd)	__pte_to_phys(pmd_pte(pmd))
+ #define __phys_to_pmd_val(phys)	__phys_to_pte_val(phys)
+ #define pmd_pfn(pmd)		((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
+@@ -545,6 +553,27 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+ #define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
+ #define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
+ 
++#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
++#define pud_special(pte)	pte_special(pud_pte(pud))
++#define pud_mkspecial(pte)	pte_pud(pte_mkspecial(pud_pte(pud)))
++#endif
++
++#define pmd_pgprot pmd_pgprot
++static inline pgprot_t pmd_pgprot(pmd_t pmd)
++{
++	unsigned long pfn = pmd_pfn(pmd);
++
++	return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
++}
++
++#define pud_pgprot pud_pgprot
++static inline pgprot_t pud_pgprot(pud_t pud)
++{
++	unsigned long pfn = pud_pfn(pud);
++
++	return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
++}
++
+ static inline void __set_pte_at(struct mm_struct *mm,
+ 				unsigned long __always_unused addr,
+ 				pte_t *ptep, pte_t pte, unsigned int nr)
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5731aacd.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5731aacd.failed
new file mode 100644
index 0000000000000..debf935c26f86
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5731aacd.failed
@@ -0,0 +1,80 @@
+KVM: use follow_pfnmap API
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit 5731aacd54a883dd2c1a5e8c85e1fe78fc728dc7
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5731aacd.failed
+
+Use the new pfnmap API to allow huge MMIO mappings for VMs.  The rest work
+is done perfectly on the other side (host_pfn_mapping_level()).
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-11-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 5731aacd54a883dd2c1a5e8c85e1fe78fc728dc7)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	virt/kvm/kvm_main.c
+diff --cc virt/kvm/kvm_main.c
+index b163a079fe65,f416d5e3f9c0..000000000000
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@@ -2878,13 -2860,11 +2878,15 @@@ static int hva_to_pfn_remapped(struct v
+  			       unsigned long addr, bool write_fault,
+  			       bool *writable, kvm_pfn_t *p_pfn)
+  {
++ 	struct follow_pfnmap_args args = { .vma = vma, .address = addr };
+  	kvm_pfn_t pfn;
+- 	pte_t *ptep;
+- 	pte_t pte;
+- 	spinlock_t *ptl;
+  	int r;
+  
+++<<<<<<< HEAD
+ +	r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
+++=======
++ 	r = follow_pfnmap_start(&args);
+++>>>>>>> 5731aacd54a8 (KVM: use follow_pfnmap API)
+  	if (r) {
+  		/*
+  		 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+@@@ -2899,7 -2879,7 +2901,11 @@@
+  		if (r)
+  			return r;
+  
+++<<<<<<< HEAD
+ +		r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
+++=======
++ 		r = follow_pfnmap_start(&args);
+++>>>>>>> 5731aacd54a8 (KVM: use follow_pfnmap API)
+  		if (r)
+  			return r;
+  	}
+* Unmerged path virt/kvm/kvm_main.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5b34b76c.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5b34b76c.failed
new file mode 100644
index 0000000000000..8f30775e13fed
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5b34b76c.failed
@@ -0,0 +1,148 @@
+mm: move follow_phys to arch/x86/mm/pat/memtype.c
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Christoph Hellwig <hch@lst.de>
+commit 5b34b76cb0cd8a21dee5c7677eae98480b0d05cc
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/5b34b76c.failed
+
+follow_phys is only used by two callers in arch/x86/mm/pat/memtype.c. 
+Move it there and hardcode the two arguments that get the same values
+passed by both callers.
+
+[david@redhat.com: conflict resolutions]
+Link: https://lkml.kernel.org/r/20240403212131.929421-4-david@redhat.com
+Link: https://lkml.kernel.org/r/20240324234542.2038726-4-hch@lst.de
+	Signed-off-by: Christoph Hellwig <hch@lst.de>
+	Signed-off-by: David Hildenbrand <david@redhat.com>
+	Reviewed-by: David Hildenbrand <david@redhat.com>
+	Cc: Andy Lutomirski <luto@kernel.org>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: Fei Li <fei1.li@intel.com>
+	Cc: Ingo Molnar <mingo@kernel.org>
+	Cc: Peter Zijlstra <peterz@infradead.org>
+	Cc: Nathan Chancellor <nathan@kernel.org>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 5b34b76cb0cd8a21dee5c7677eae98480b0d05cc)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	include/linux/mm.h
+diff --cc include/linux/mm.h
+index 196c481ec160,5dc65618e386..000000000000
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@@ -2429,10 -2424,6 +2429,13 @@@ in
+  copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+  int follow_pte(struct mm_struct *mm, unsigned long address,
+  	       pte_t **ptepp, spinlock_t **ptlp);
+++<<<<<<< HEAD
+ +int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ +	unsigned long *pfn);
+ +int follow_phys(struct vm_area_struct *vma, unsigned long address,
+ +		unsigned int flags, unsigned long *prot, resource_size_t *phys);
+++=======
+++>>>>>>> 5b34b76cb0cd (mm: move follow_phys to arch/x86/mm/pat/memtype.c)
+  int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+  			void *buf, int len, int write);
+  
+diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
+index 36b603d0cdde..d01c3b0bd6eb 100644
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@ -39,6 +39,7 @@
+ #include <linux/pfn_t.h>
+ #include <linux/slab.h>
+ #include <linux/mm.h>
++#include <linux/highmem.h>
+ #include <linux/fs.h>
+ #include <linux/rbtree.h>
+ 
+@@ -947,6 +948,32 @@ static void free_pfn_range(u64 paddr, unsigned long size)
+ 		memtype_free(paddr, paddr + size);
+ }
+ 
++static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
++		resource_size_t *phys)
++{
++	pte_t *ptep, pte;
++	spinlock_t *ptl;
++
++	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
++		return -EINVAL;
++
++	if (follow_pte(vma->vm_mm, vma->vm_start, &ptep, &ptl))
++		return -EINVAL;
++
++	pte = ptep_get(ptep);
++
++	/* Never return PFNs of anon folios in COW mappings. */
++	if (vm_normal_folio(vma, vma->vm_start, pte)) {
++		pte_unmap_unlock(ptep, ptl);
++		return -EINVAL;
++	}
++
++	*prot = pgprot_val(pte_pgprot(pte));
++	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
++	pte_unmap_unlock(ptep, ptl);
++	return 0;
++}
++
+ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ 		pgprot_t *pgprot)
+ {
+@@ -964,7 +991,7 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ 	 * detect the PFN. If we need the cachemode as well, we're out of luck
+ 	 * for now and have to fail fork().
+ 	 */
+-	if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
++	if (!follow_phys(vma, &prot, paddr)) {
+ 		if (pgprot)
+ 			*pgprot = __pgprot(prot);
+ 		return 0;
+* Unmerged path include/linux/mm.h
+diff --git a/mm/memory.c b/mm/memory.c
+index e2794e3b8919..257618e95c0e 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5699,38 +5699,6 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ EXPORT_SYMBOL(follow_pfn);
+ 
+ #ifdef CONFIG_HAVE_IOREMAP_PROT
+-int follow_phys(struct vm_area_struct *vma,
+-		unsigned long address, unsigned int flags,
+-		unsigned long *prot, resource_size_t *phys)
+-{
+-	int ret = -EINVAL;
+-	pte_t *ptep, pte;
+-	spinlock_t *ptl;
+-
+-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+-		goto out;
+-
+-	if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+-		goto out;
+-	pte = ptep_get(ptep);
+-
+-	/* Never return PFNs of anon folios in COW mappings. */
+-	if (vm_normal_folio(vma, address, pte))
+-		goto unlock;
+-
+-	if ((flags & FOLL_WRITE) && !pte_write(pte))
+-		goto unlock;
+-
+-	*prot = pgprot_val(pte_pgprot(pte));
+-	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+-
+-	ret = 0;
+-unlock:
+-	pte_unmap_unlock(ptep, ptl);
+-out:
+-	return ret;
+-}
+-
+ /**
+  * generic_access_phys - generic implementation for iomem mmap access
+  * @vma: the vma to access
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/62fb8adc.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/62fb8adc.failed
new file mode 100644
index 0000000000000..f77fa7edb2880
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/62fb8adc.failed
@@ -0,0 +1,268 @@
+mm: Provide address mask in struct follow_pfnmap_args
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Alex Williamson <alex.williamson@redhat.com>
+commit 62fb8adc43afad5fa1c9cadc6f3a8e9fb72af194
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/62fb8adc.failed
+
+follow_pfnmap_start() walks the page table for a given address and
+fills out the struct follow_pfnmap_args in pfnmap_args_setup().
+The address mask of the page table level is already provided to this
+latter function for calculating the pfn.  This address mask can also
+be useful for the caller to determine the extent of the contiguous
+mapping.
+
+For example, vfio-pci now supports huge_fault for pfnmaps and is able
+to insert pud and pmd mappings.  When we DMA map these pfnmaps, ex.
+PCI MMIO BARs, we iterate follow_pfnmap_start() to get each pfn to test
+for a contiguous pfn range.  Providing the mapping address mask allows
+us to skip the extent of the mapping level.  Assuming a 1GB pud level
+and 4KB page size, iterations are reduced by a factor of 256K.  In wall
+clock time, mapping a 32GB PCI BAR is reduced from ~1s to <1ms.
+
+	Cc: Andrew Morton <akpm@linux-foundation.org>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: linux-mm@kvack.org
+	Reviewed-by: Peter Xu <peterx@redhat.com>
+	Reviewed-by: Mitchell Augustin <mitchell.augustin@canonical.com>
+	Tested-by: Mitchell Augustin <mitchell.augustin@canonical.com>
+	Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+	Acked-by: David Hildenbrand <david@redhat.com>
+Link: https://lore.kernel.org/r/20250218222209.1382449-6-alex.williamson@redhat.com
+	Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+(cherry picked from commit 62fb8adc43afad5fa1c9cadc6f3a8e9fb72af194)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	include/linux/mm.h
+#	mm/memory.c
+diff --cc include/linux/mm.h
+index 196c481ec160,92b30dba7e38..000000000000
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@@ -2436,6 -2398,39 +2436,42 @@@ int follow_phys(struct vm_area_struct *
+  int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+  			void *buf, int len, int write);
+  
+++<<<<<<< HEAD
+++=======
++ struct follow_pfnmap_args {
++ 	/**
++ 	 * Inputs:
++ 	 * @vma: Pointer to @vm_area_struct struct
++ 	 * @address: the virtual address to walk
++ 	 */
++ 	struct vm_area_struct *vma;
++ 	unsigned long address;
++ 	/**
++ 	 * Internals:
++ 	 *
++ 	 * The caller shouldn't touch any of these.
++ 	 */
++ 	spinlock_t *lock;
++ 	pte_t *ptep;
++ 	/**
++ 	 * Outputs:
++ 	 *
++ 	 * @pfn: the PFN of the address
++ 	 * @addr_mask: address mask covering pfn
++ 	 * @pgprot: the pgprot_t of the mapping
++ 	 * @writable: whether the mapping is writable
++ 	 * @special: whether the mapping is a special mapping (real PFN maps)
++ 	 */
++ 	unsigned long pfn;
++ 	unsigned long addr_mask;
++ 	pgprot_t pgprot;
++ 	bool writable;
++ 	bool special;
++ };
++ int follow_pfnmap_start(struct follow_pfnmap_args *args);
++ void follow_pfnmap_end(struct follow_pfnmap_args *args);
++ 
+++>>>>>>> 62fb8adc43af (mm: Provide address mask in struct follow_pfnmap_args)
+  extern void truncate_pagecache(struct inode *inode, loff_t new);
+  extern void truncate_setsize(struct inode *inode, loff_t newsize);
+  void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
+diff --cc mm/memory.c
+index e2794e3b8919,68aa0f11633e..000000000000
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@@ -5607,60 -6479,137 +5607,92 @@@ int __pmd_alloc(struct mm_struct *mm, p
+  }
+  #endif /* __PAGETABLE_PMD_FOLDED */
+  
+++<<<<<<< HEAD
+++=======
++ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
++ 				     spinlock_t *lock, pte_t *ptep,
++ 				     pgprot_t pgprot, unsigned long pfn_base,
++ 				     unsigned long addr_mask, bool writable,
++ 				     bool special)
++ {
++ 	args->lock = lock;
++ 	args->ptep = ptep;
++ 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
++ 	args->addr_mask = addr_mask;
++ 	args->pgprot = pgprot;
++ 	args->writable = writable;
++ 	args->special = special;
++ }
++ 
++ static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
++ {
++ #ifdef CONFIG_LOCKDEP
++ 	struct file *file = vma->vm_file;
++ 	struct address_space *mapping = file ? file->f_mapping : NULL;
++ 
++ 	if (mapping)
++ 		lockdep_assert(lockdep_is_held(&mapping->i_mmap_rwsem) ||
++ 			       lockdep_is_held(&vma->vm_mm->mmap_lock));
++ 	else
++ 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
++ #endif
++ }
++ 
+++>>>>>>> 62fb8adc43af (mm: Provide address mask in struct follow_pfnmap_args)
+  /**
+ - * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
+ - * @args: Pointer to struct @follow_pfnmap_args
+ - *
+ - * The caller needs to setup args->vma and args->address to point to the
+ - * virtual address as the target of such lookup.  On a successful return,
+ - * the results will be put into other output fields.
+ + * follow_pte - look up PTE at a user virtual address
+ + * @mm: the mm_struct of the target address space
+ + * @address: user virtual address
+ + * @ptepp: location to store found PTE
+ + * @ptlp: location to store the lock for the PTE
+   *
+ - * After the caller finished using the fields, the caller must invoke
+ - * another follow_pfnmap_end() to proper releases the locks and resources
+ - * of such look up request.
+ - *
+ - * During the start() and end() calls, the results in @args will be valid
+ - * as proper locks will be held.  After the end() is called, all the fields
+ - * in @follow_pfnmap_args will be invalid to be further accessed.  Further
+ - * use of such information after end() may require proper synchronizations
+ - * by the caller with page table updates, otherwise it can create a
+ - * security bug.
+ - *
+ - * If the PTE maps a refcounted page, callers are responsible to protect
+ - * against invalidation with MMU notifiers; otherwise access to the PFN at
+ - * a later point in time can trigger use-after-free.
+ + * On a successful return, the pointer to the PTE is stored in @ptepp;
+ + * the corresponding lock is taken and its location is stored in @ptlp.
+ + * The contents of the PTE are only stable until @ptlp is released;
+ + * any further use, if any, must be protected against invalidation
+ + * with MMU notifiers.
+   *
+   * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
+ - * should be taken for read, and the mmap semaphore cannot be released
+ - * before the end() is invoked.
+ + * should be taken for read.
+   *
+ - * This function must not be used to modify PTE content.
+ + * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
+ + * it is not a good general-purpose API.
+   *
+ - * Return: zero on success, negative otherwise.
+ + * Return: zero on success, -ve otherwise.
+   */
+ -int follow_pfnmap_start(struct follow_pfnmap_args *args)
+ +int follow_pte(struct mm_struct *mm, unsigned long address,
+ +	       pte_t **ptepp, spinlock_t **ptlp)
+  {
+ -	struct vm_area_struct *vma = args->vma;
+ -	unsigned long address = args->address;
+ -	struct mm_struct *mm = vma->vm_mm;
+ -	spinlock_t *lock;
+ -	pgd_t *pgdp;
+ -	p4d_t *p4dp, p4d;
+ -	pud_t *pudp, pud;
+ -	pmd_t *pmdp, pmd;
+ -	pte_t *ptep, pte;
+ -
+ -	pfnmap_lockdep_assert(vma);
+ -
+ -	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ -		goto out;
+ +	pgd_t *pgd;
+ +	p4d_t *p4d;
+ +	pud_t *pud;
+ +	pmd_t *pmd;
+ +	pte_t *ptep;
+  
+ -	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ -		goto out;
+ -retry:
+ -	pgdp = pgd_offset(mm, address);
+ -	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
+ +	pgd = pgd_offset(mm, address);
+ +	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+  		goto out;
+  
+ -	p4dp = p4d_offset(pgdp, address);
+ -	p4d = READ_ONCE(*p4dp);
+ -	if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
+ +	p4d = p4d_offset(pgd, address);
+ +	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+  		goto out;
+  
+ -	pudp = pud_offset(p4dp, address);
+ -	pud = READ_ONCE(*pudp);
+ -	if (pud_none(pud))
+ +	pud = pud_offset(p4d, address);
+ +	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+  		goto out;
+ -	if (pud_leaf(pud)) {
+ -		lock = pud_lock(mm, pudp);
+ -		if (!unlikely(pud_leaf(pud))) {
+ -			spin_unlock(lock);
+ -			goto retry;
+ -		}
+ -		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
+ -				  pud_pfn(pud), PUD_MASK, pud_write(pud),
+ -				  pud_special(pud));
+ -		return 0;
+ -	}
+  
+ -	pmdp = pmd_offset(pudp, address);
+ -	pmd = pmdp_get_lockless(pmdp);
+ -	if (pmd_leaf(pmd)) {
+ -		lock = pmd_lock(mm, pmdp);
+ -		if (!unlikely(pmd_leaf(pmd))) {
+ -			spin_unlock(lock);
+ -			goto retry;
+ -		}
+ -		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
+ -				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
+ -				  pmd_special(pmd));
+ -		return 0;
+ -	}
+ +	pmd = pmd_offset(pud, address);
+ +	VM_BUG_ON(pmd_trans_huge(*pmd));
+  
+ -	ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
+ +	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
+  	if (!ptep)
+  		goto out;
+ -	pte = ptep_get(ptep);
+ -	if (!pte_present(pte))
+ +	if (!pte_present(ptep_get(ptep)))
+  		goto unlock;
+ -	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
+ -			  pte_pfn(pte), PAGE_MASK, pte_write(pte),
+ -			  pte_special(pte));
+ +	*ptepp = ptep;
+  	return 0;
+  unlock:
+ -	pte_unmap_unlock(ptep, lock);
+ +	pte_unmap_unlock(ptep, *ptlp);
+  out:
+  	return -EINVAL;
+  }
+* Unmerged path include/linux/mm.h
+* Unmerged path mm/memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6857be5f.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6857be5f.failed
new file mode 100644
index 0000000000000..865ebb964bd95
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6857be5f.failed
@@ -0,0 +1,246 @@
+mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit 6857be5fecaebd9773ff27b6d29b6fff3b1abbce
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6857be5f.failed
+
+Patch series "mm: Support huge pfnmaps", v2.
+
+Overview
+========
+
+This series implements huge pfnmaps support for mm in general.  Huge
+pfnmap allows e.g.  VM_PFNMAP vmas to map in either PMD or PUD levels,
+similar to what we do with dax / thp / hugetlb so far to benefit from TLB
+hits.  Now we extend that idea to PFN mappings, e.g.  PCI MMIO bars where
+it can grow as large as 8GB or even bigger.
+
+Currently, only x86_64 (1G+2M) and arm64 (2M) are supported.  The last
+patch (from Alex Williamson) will be the first user of huge pfnmap, so as
+to enable vfio-pci driver to fault in huge pfn mappings.
+
+Implementation
+==============
+
+In reality, it's relatively simple to add such support comparing to many
+other types of mappings, because of PFNMAP's specialties when there's no
+vmemmap backing it, so that most of the kernel routines on huge mappings
+should simply already fail for them, like GUPs or old-school follow_page()
+(which is recently rewritten to be folio_walk* APIs by David).
+
+One trick here is that we're still unmature on PUDs in generic paths here
+and there, as DAX is so far the only user.  This patchset will add the 2nd
+user of it.  Hugetlb can be a 3rd user if the hugetlb unification work can
+go on smoothly, but to be discussed later.
+
+The other trick is how to allow gup-fast working for such huge mappings
+even if there's no direct sign of knowing whether it's a normal page or
+MMIO mapping.  This series chose to keep the pte_special solution, so that
+it reuses similar idea on setting a special bit to pfnmap PMDs/PUDs so
+that gup-fast will be able to identify them and fail properly.
+
+Along the way, we'll also notice that the major pgtable pfn walker, aka,
+follow_pte(), will need to retire soon due to the fact that it only works
+with ptes.  A new set of simple API is introduced (follow_pfnmap* API) to
+be able to do whatever follow_pte() can already do, plus that it can also
+process huge pfnmaps now.  Half of this series is about that and
+converting all existing pfnmap walkers to use the new API properly. 
+Hopefully the new API also looks better to avoid exposing e.g.  pgtable
+lock details into the callers, so that it can be used in an even more
+straightforward way.
+
+Here, three more options will be introduced and involved in huge pfnmap:
+
+  - ARCH_SUPPORTS_HUGE_PFNMAP
+
+    Arch developers will need to select this option when huge pfnmap is
+    supported in arch's Kconfig.  After this patchset applied, both x86_64
+    and arm64 will start to enable it by default.
+
+  - ARCH_SUPPORTS_PMD_PFNMAP / ARCH_SUPPORTS_PUD_PFNMAP
+
+    These options are for driver developers to identify whether current
+    arch / config supports huge pfnmaps, making decision on whether it can
+    use the huge pfnmap APIs to inject them.  One can refer to the last
+    vfio-pci patch from Alex on the use of them properly in a device
+    driver.
+
+So after the whole set applied, and if one would enable some dynamic debug
+lines in vfio-pci core files, we should observe things like:
+
+  vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x0: 0x100
+  vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x200: 0x100
+  vfio-pci 0000:00:06.0: vfio_pci_mmap_huge_fault(,order = 9) BAR 0 page offset 0x400: 0x100
+
+In this specific case, it says that vfio-pci faults in PMDs properly for a
+few BAR0 offsets.
+
+Patch Layout
+============
+
+Patch 1:         Introduce the new options mentioned above for huge PFNMAPs
+Patch 2:         A tiny cleanup
+Patch 3-8:       Preparation patches for huge pfnmap (include introduce
+                 special bit for pmd/pud)
+Patch 9-16:      Introduce follow_pfnmap*() API, use it everywhere, and
+                 then drop follow_pte() API
+Patch 17:        Add huge pfnmap support for x86_64
+Patch 18:        Add huge pfnmap support for arm64
+Patch 19:        Add vfio-pci support for all kinds of huge pfnmaps (Alex)
+
+TODO
+====
+
+More architectures / More page sizes
+------------------------------------
+
+Currently only x86_64 (2M+1G) and arm64 (2M) are supported.  There seems
+to have plan to support arm64 1G later on top of this series [2].
+
+Any arch will need to first support THP / THP_1G, then provide a special
+bit in pmds/puds to support huge pfnmaps.
+
+remap_pfn_range() support
+-------------------------
+
+Currently, remap_pfn_range() still only maps PTEs.  With the new option,
+remap_pfn_range() can logically start to inject either PMDs or PUDs when
+the alignment requirements match on the VAs.
+
+When the support is there, it should be able to silently benefit all
+drivers that is using remap_pfn_range() in its mmap() handler on better
+TLB hit rate and overall faster MMIO accesses similar to processor on
+hugepages.
+
+More driver support
+-------------------
+
+VFIO is so far the only consumer for the huge pfnmaps after this series
+applied.  Besides above remap_pfn_range() generic optimization, device
+driver can also try to optimize its mmap() on a better VA alignment for
+either PMD/PUD sizes.  This may, iiuc, normally require userspace changes,
+as the driver doesn't normally decide the VA to map a bar.  But I don't
+think I know all the drivers to know the full picture.
+
+Credits all go to Alex on help testing the GPU/NIC use cases above.
+
+[0] https://lore.kernel.org/r/73ad9540-3fb8-4154-9a4f-30a0a2b03d41@lucifer.local
+[1] https://lore.kernel.org/r/20240807194812.819412-1-peterx@redhat.com
+[2] https://lore.kernel.org/r/498e0731-81a4-4f75-95b4-a8ad0bcc7665@huawei.com
+
+
+This patch (of 19):
+
+This patch introduces the option to introduce special pte bit into
+pmd/puds.  Archs can start to define pmd_special / pud_special when
+supported by selecting the new option.  Per-arch support will be added
+later.
+
+Before that, create fallbacks for these helpers so that they are always
+available.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20240826204353.2228736-2-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 6857be5fecaebd9773ff27b6d29b6fff3b1abbce)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/Kconfig
+diff --cc mm/Kconfig
+index a91823e31f45,1aa282e35dc7..000000000000
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@@ -898,6 -870,25 +898,28 @@@ config READ_ONLY_THP_FOR_F
+  endif # TRANSPARENT_HUGEPAGE
+  
+  #
+++<<<<<<< HEAD
+++=======
++ # The architecture supports pgtable leaves that is larger than PAGE_SIZE
++ #
++ config PGTABLE_HAS_HUGE_LEAVES
++ 	def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE
++ 
++ # TODO: Allow to be enabled without THP
++ config ARCH_SUPPORTS_HUGE_PFNMAP
++ 	def_bool n
++ 	depends on TRANSPARENT_HUGEPAGE
++ 
++ config ARCH_SUPPORTS_PMD_PFNMAP
++ 	def_bool y
++ 	depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE
++ 
++ config ARCH_SUPPORTS_PUD_PFNMAP
++ 	def_bool y
++ 	depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
++ 
++ #
+++>>>>>>> 6857be5fecae (mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud)
+  # UP and nommu archs use km based percpu allocator
+  #
+  config NEED_PER_CPU_KM
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 196c481ec160..7b6f347d05b9 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2730,6 +2730,30 @@ static inline pte_t pte_mkspecial(pte_t pte)
+ }
+ #endif
+ 
++#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
++static inline bool pmd_special(pmd_t pmd)
++{
++	return false;
++}
++
++static inline pmd_t pmd_mkspecial(pmd_t pmd)
++{
++	return pmd;
++}
++#endif	/* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
++
++#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
++static inline bool pud_special(pud_t pud)
++{
++	return false;
++}
++
++static inline pud_t pud_mkspecial(pud_t pud)
++{
++	return pud;
++}
++#endif	/* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
++
+ #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
+ static inline int pte_devmap(pte_t pte)
+ {
+* Unmerged path mm/Kconfig
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed
new file mode 100644
index 0000000000000..82e01ae0d49b8
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed
@@ -0,0 +1,316 @@
+mm: new follow_pfnmap API
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit 6da8e9634bb7e3fdad9ae0e4db873a05036c4343
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed
+
+Introduce a pair of APIs to follow pfn mappings to get entry information. 
+It's very similar to what follow_pte() does before, but different in that
+it recognizes huge pfn mappings.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-10-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 6da8e9634bb7e3fdad9ae0e4db873a05036c4343)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/memory.c
+diff --cc mm/memory.c
+index e2794e3b8919,3878bf69bc14..000000000000
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@@ -5666,71 -6172,157 +5666,223 @@@ out
+  }
+  EXPORT_SYMBOL_GPL(follow_pte);
+  
+++<<<<<<< HEAD
+ +/**
+ + * follow_pfn - look up PFN at a user virtual address
+ + * @vma: memory mapping
+ + * @address: user virtual address
+ + * @pfn: location to store found PFN
+ + *
+ + * Only IO mappings and raw PFN mappings are allowed.
+ + *
+ + * This function does not allow the caller to read the permissions
+ + * of the PTE.  Do not use it.
+ + *
+ + * Return: zero and the pfn at @pfn on success, -ve otherwise.
+ + */
+ +int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ +	unsigned long *pfn)
+ +{
+ +	int ret = -EINVAL;
+ +	spinlock_t *ptl;
+ +	pte_t *ptep;
+ +
+ +	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ +		return ret;
+ +
+ +	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
+ +	if (ret)
+ +		return ret;
+ +	*pfn = pte_pfn(ptep_get(ptep));
+ +	pte_unmap_unlock(ptep, ptl);
+ +	return 0;
+ +}
+ +EXPORT_SYMBOL(follow_pfn);
+++=======
++ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
++ 				     spinlock_t *lock, pte_t *ptep,
++ 				     pgprot_t pgprot, unsigned long pfn_base,
++ 				     unsigned long addr_mask, bool writable,
++ 				     bool special)
++ {
++ 	args->lock = lock;
++ 	args->ptep = ptep;
++ 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
++ 	args->pgprot = pgprot;
++ 	args->writable = writable;
++ 	args->special = special;
++ }
++ 
++ static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
++ {
++ #ifdef CONFIG_LOCKDEP
++ 	struct address_space *mapping = vma->vm_file->f_mapping;
++ 
++ 	if (mapping)
++ 		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
++ 			       lockdep_is_held(&vma->vm_mm->mmap_lock));
++ 	else
++ 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
++ #endif
++ }
++ 
++ /**
++  * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
++  * @args: Pointer to struct @follow_pfnmap_args
++  *
++  * The caller needs to setup args->vma and args->address to point to the
++  * virtual address as the target of such lookup.  On a successful return,
++  * the results will be put into other output fields.
++  *
++  * After the caller finished using the fields, the caller must invoke
++  * another follow_pfnmap_end() to proper releases the locks and resources
++  * of such look up request.
++  *
++  * During the start() and end() calls, the results in @args will be valid
++  * as proper locks will be held.  After the end() is called, all the fields
++  * in @follow_pfnmap_args will be invalid to be further accessed.  Further
++  * use of such information after end() may require proper synchronizations
++  * by the caller with page table updates, otherwise it can create a
++  * security bug.
++  *
++  * If the PTE maps a refcounted page, callers are responsible to protect
++  * against invalidation with MMU notifiers; otherwise access to the PFN at
++  * a later point in time can trigger use-after-free.
++  *
++  * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
++  * should be taken for read, and the mmap semaphore cannot be released
++  * before the end() is invoked.
++  *
++  * This function must not be used to modify PTE content.
++  *
++  * Return: zero on success, negative otherwise.
++  */
++ int follow_pfnmap_start(struct follow_pfnmap_args *args)
++ {
++ 	struct vm_area_struct *vma = args->vma;
++ 	unsigned long address = args->address;
++ 	struct mm_struct *mm = vma->vm_mm;
++ 	spinlock_t *lock;
++ 	pgd_t *pgdp;
++ 	p4d_t *p4dp, p4d;
++ 	pud_t *pudp, pud;
++ 	pmd_t *pmdp, pmd;
++ 	pte_t *ptep, pte;
++ 
++ 	pfnmap_lockdep_assert(vma);
++ 
++ 	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
++ 		goto out;
++ 
++ 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
++ 		goto out;
++ retry:
++ 	pgdp = pgd_offset(mm, address);
++ 	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
++ 		goto out;
++ 
++ 	p4dp = p4d_offset(pgdp, address);
++ 	p4d = READ_ONCE(*p4dp);
++ 	if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
++ 		goto out;
++ 
++ 	pudp = pud_offset(p4dp, address);
++ 	pud = READ_ONCE(*pudp);
++ 	if (pud_none(pud))
++ 		goto out;
++ 	if (pud_leaf(pud)) {
++ 		lock = pud_lock(mm, pudp);
++ 		if (!unlikely(pud_leaf(pud))) {
++ 			spin_unlock(lock);
++ 			goto retry;
++ 		}
++ 		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
++ 				  pud_pfn(pud), PUD_MASK, pud_write(pud),
++ 				  pud_special(pud));
++ 		return 0;
++ 	}
++ 
++ 	pmdp = pmd_offset(pudp, address);
++ 	pmd = pmdp_get_lockless(pmdp);
++ 	if (pmd_leaf(pmd)) {
++ 		lock = pmd_lock(mm, pmdp);
++ 		if (!unlikely(pmd_leaf(pmd))) {
++ 			spin_unlock(lock);
++ 			goto retry;
++ 		}
++ 		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
++ 				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
++ 				  pmd_special(pmd));
++ 		return 0;
++ 	}
++ 
++ 	ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
++ 	if (!ptep)
++ 		goto out;
++ 	pte = ptep_get(ptep);
++ 	if (!pte_present(pte))
++ 		goto unlock;
++ 	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
++ 			  pte_pfn(pte), PAGE_MASK, pte_write(pte),
++ 			  pte_special(pte));
++ 	return 0;
++ unlock:
++ 	pte_unmap_unlock(ptep, lock);
++ out:
++ 	return -EINVAL;
++ }
++ EXPORT_SYMBOL_GPL(follow_pfnmap_start);
++ 
++ /**
++  * follow_pfnmap_end(): End a follow_pfnmap_start() process
++  * @args: Pointer to struct @follow_pfnmap_args
++  *
++  * Must be used in pair of follow_pfnmap_start().  See the start() function
++  * above for more information.
++  */
++ void follow_pfnmap_end(struct follow_pfnmap_args *args)
++ {
++ 	if (args->lock)
++ 		spin_unlock(args->lock);
++ 	if (args->ptep)
++ 		pte_unmap(args->ptep);
++ }
++ EXPORT_SYMBOL_GPL(follow_pfnmap_end);
+++>>>>>>> 6da8e9634bb7 (mm: new follow_pfnmap API)
+  
+  #ifdef CONFIG_HAVE_IOREMAP_PROT
+ +int follow_phys(struct vm_area_struct *vma,
+ +		unsigned long address, unsigned int flags,
+ +		unsigned long *prot, resource_size_t *phys)
+ +{
+ +	int ret = -EINVAL;
+ +	pte_t *ptep, pte;
+ +	spinlock_t *ptl;
+ +
+ +	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ +		goto out;
+ +
+ +	if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+ +		goto out;
+ +	pte = ptep_get(ptep);
+ +
+ +	/* Never return PFNs of anon folios in COW mappings. */
+ +	if (vm_normal_folio(vma, address, pte))
+ +		goto unlock;
+ +
+ +	if ((flags & FOLL_WRITE) && !pte_write(pte))
+ +		goto unlock;
+ +
+ +	*prot = pgprot_val(pte_pgprot(pte));
+ +	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+ +
+ +	ret = 0;
+ +unlock:
+ +	pte_unmap_unlock(ptep, ptl);
+ +out:
+ +	return ret;
+ +}
+ +
+  /**
+   * generic_access_phys - generic implementation for iomem mmap access
+   * @vma: the vma to access
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 196c481ec160..51f28b4e78fc 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2436,6 +2436,37 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address,
+ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+ 			void *buf, int len, int write);
+ 
++struct follow_pfnmap_args {
++	/**
++	 * Inputs:
++	 * @vma: Pointer to @vm_area_struct struct
++	 * @address: the virtual address to walk
++	 */
++	struct vm_area_struct *vma;
++	unsigned long address;
++	/**
++	 * Internals:
++	 *
++	 * The caller shouldn't touch any of these.
++	 */
++	spinlock_t *lock;
++	pte_t *ptep;
++	/**
++	 * Outputs:
++	 *
++	 * @pfn: the PFN of the address
++	 * @pgprot: the pgprot_t of the mapping
++	 * @writable: whether the mapping is writable
++	 * @special: whether the mapping is a special mapping (real PFN maps)
++	 */
++	unsigned long pfn;
++	pgprot_t pgprot;
++	bool writable;
++	bool special;
++};
++int follow_pfnmap_start(struct follow_pfnmap_args *args);
++void follow_pfnmap_end(struct follow_pfnmap_args *args);
++
+ extern void truncate_pagecache(struct inode *inode, loff_t new);
+ extern void truncate_setsize(struct inode *inode, loff_t newsize);
+ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
+* Unmerged path mm/memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/75182022.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/75182022.failed
new file mode 100644
index 0000000000000..294d2eff42b30
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/75182022.failed
@@ -0,0 +1,179 @@
+mm/x86: support large pfn mappings
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit 75182022a0439788415b2dd1db3086e07aa506f7
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/75182022.failed
+
+Helpers to install and detect special pmd/pud entries.  In short, bit 9 on
+x86 is not used for pmd/pud, so we can directly define them the same as
+the pte level.  One note is that it's also used in _PAGE_BIT_CPA_TEST but
+that is only used in the debug test, and shouldn't conflict in this case.
+
+One note is that pxx_set|clear_flags() for pmd/pud will need to be moved
+upper so that they can be referenced by the new special bit helpers. 
+There's no change in the code that was moved.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-18-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit 75182022a0439788415b2dd1db3086e07aa506f7)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	arch/x86/Kconfig
+diff --cc arch/x86/Kconfig
+index a5a59118efe4,d4dbe9717e96..000000000000
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@@ -28,7 -28,7 +28,11 @@@ config X86_6
+  	select ARCH_HAS_GIGANTIC_PAGE
+  	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
+  	select ARCH_SUPPORTS_PER_VMA_LOCK
+++<<<<<<< HEAD
+ +	select ARCH_SUPPORTS_RT
+++=======
++ 	select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
+++>>>>>>> 75182022a043 (mm/x86: support large pfn mappings)
+  	select HAVE_ARCH_SOFT_DIRTY
+  	select MODULES_USE_ELF_RELA
+  	select NEED_DMA_MAP_STATE
+* Unmerged path arch/x86/Kconfig
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 8149afec43a4..c5bc120fade8 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -121,6 +121,34 @@ extern pmdval_t early_pmd_flags;
+ #define arch_end_context_switch(prev)	do {} while(0)
+ #endif	/* CONFIG_PARAVIRT_XXL */
+ 
++static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
++{
++	pmdval_t v = native_pmd_val(pmd);
++
++	return native_make_pmd(v | set);
++}
++
++static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
++{
++	pmdval_t v = native_pmd_val(pmd);
++
++	return native_make_pmd(v & ~clear);
++}
++
++static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
++{
++	pudval_t v = native_pud_val(pud);
++
++	return native_make_pud(v | set);
++}
++
++static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
++{
++	pudval_t v = native_pud_val(pud);
++
++	return native_make_pud(v & ~clear);
++}
++
+ /*
+  * The following only work if pte_present() is true.
+  * Undefined behaviour if not..
+@@ -310,6 +338,30 @@ static inline int pud_devmap(pud_t pud)
+ }
+ #endif
+ 
++#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
++static inline bool pmd_special(pmd_t pmd)
++{
++	return pmd_flags(pmd) & _PAGE_SPECIAL;
++}
++
++static inline pmd_t pmd_mkspecial(pmd_t pmd)
++{
++	return pmd_set_flags(pmd, _PAGE_SPECIAL);
++}
++#endif	/* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
++
++#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
++static inline bool pud_special(pud_t pud)
++{
++	return pud_flags(pud) & _PAGE_SPECIAL;
++}
++
++static inline pud_t pud_mkspecial(pud_t pud)
++{
++	return pud_set_flags(pud, _PAGE_SPECIAL);
++}
++#endif	/* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
++
+ static inline int pgd_devmap(pgd_t pgd)
+ {
+ 	return 0;
+@@ -480,20 +532,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
+ 	return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
+ }
+ 
+-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+-{
+-	pmdval_t v = native_pmd_val(pmd);
+-
+-	return native_make_pmd(v | set);
+-}
+-
+-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+-{
+-	pmdval_t v = native_pmd_val(pmd);
+-
+-	return native_make_pmd(v & ~clear);
+-}
+-
+ /* See comments above mksaveddirty_shift() */
+ static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
+ {
+@@ -588,20 +626,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
+ pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+ #define pmd_mkwrite pmd_mkwrite
+ 
+-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+-{
+-	pudval_t v = native_pud_val(pud);
+-
+-	return native_make_pud(v | set);
+-}
+-
+-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+-{
+-	pudval_t v = native_pud_val(pud);
+-
+-	return native_make_pud(v & ~clear);
+-}
+-
+ /* See comments above mksaveddirty_shift() */
+ static inline pud_t pud_mksaveddirty(pud_t pud)
+ {
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/a77f9489.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/a77f9489.failed
new file mode 100644
index 0000000000000..d44a440ac766d
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/a77f9489.failed
@@ -0,0 +1,78 @@
+vfio: use the new follow_pfnmap API
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit a77f9489f1d7873a56e1d6640cc0c4865f64176b
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/a77f9489.failed
+
+Use the new API that can understand huge pfn mappings.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-14-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit a77f9489f1d7873a56e1d6640cc0c4865f64176b)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	drivers/vfio/vfio_iommu_type1.c
+diff --cc drivers/vfio/vfio_iommu_type1.c
+index 6c6586af7953,bf391b40e576..000000000000
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@@ -515,12 -513,10 +515,14 @@@ static int follow_fault_pfn(struct vm_a
+  			    unsigned long vaddr, unsigned long *pfn,
+  			    bool write_fault)
+  {
+- 	pte_t *ptep;
+- 	pte_t pte;
+- 	spinlock_t *ptl;
++ 	struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
+  	int ret;
+  
+++<<<<<<< HEAD
+ +	ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+++=======
++ 	ret = follow_pfnmap_start(&args);
+++>>>>>>> a77f9489f1d7 (vfio: use the new follow_pfnmap API)
+  	if (ret) {
+  		bool unlocked = false;
+  
+@@@ -534,7 -530,7 +536,11 @@@
+  		if (ret)
+  			return ret;
+  
+++<<<<<<< HEAD
+ +		ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+++=======
++ 		ret = follow_pfnmap_start(&args);
+++>>>>>>> a77f9489f1d7 (vfio: use the new follow_pfnmap API)
+  		if (ret)
+  			return ret;
+  	}
+* Unmerged path drivers/vfio/vfio_iommu_type1.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b0a1c0d0.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b0a1c0d0.failed
new file mode 100644
index 0000000000000..2727b419cfefb
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b0a1c0d0.failed
@@ -0,0 +1,335 @@
+mm: remove follow_pte()
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit b0a1c0d0edcd75a0f8ec5fd19dbd64b8d097f534
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b0a1c0d0.failed
+
+follow_pte() users have been converted to follow_pfnmap*().  Remove the
+API.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-17-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit b0a1c0d0edcd75a0f8ec5fd19dbd64b8d097f534)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	include/linux/mm.h
+#	mm/memory.c
+diff --cc include/linux/mm.h
+index 196c481ec160,d750be768121..000000000000
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@@ -2427,12 -2368,6 +2427,15 @@@ void free_pgd_range(struct mmu_gather *
+  		unsigned long end, unsigned long floor, unsigned long ceiling);
+  int
+  copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+++<<<<<<< HEAD
+ +int follow_pte(struct mm_struct *mm, unsigned long address,
+ +	       pte_t **ptepp, spinlock_t **ptlp);
+ +int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ +	unsigned long *pfn);
+ +int follow_phys(struct vm_area_struct *vma, unsigned long address,
+ +		unsigned int flags, unsigned long *prot, resource_size_t *phys);
+++=======
+++>>>>>>> b0a1c0d0edcd (mm: remove follow_pte())
+  int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+  			void *buf, int len, int write);
+  
+diff --cc mm/memory.c
+index e2794e3b8919,42674c0748cb..000000000000
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@@ -5607,130 -6099,157 +5607,159 @@@ int __pmd_alloc(struct mm_struct *mm, p
+  }
+  #endif /* __PAGETABLE_PMD_FOLDED */
+  
+++<<<<<<< HEAD
+ +/**
+ + * follow_pte - look up PTE at a user virtual address
+ + * @mm: the mm_struct of the target address space
+ + * @address: user virtual address
+ + * @ptepp: location to store found PTE
+ + * @ptlp: location to store the lock for the PTE
+ + *
+ + * On a successful return, the pointer to the PTE is stored in @ptepp;
+ + * the corresponding lock is taken and its location is stored in @ptlp.
+ + * The contents of the PTE are only stable until @ptlp is released;
+ + * any further use, if any, must be protected against invalidation
+ + * with MMU notifiers.
+ + *
+ + * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
+ + * should be taken for read.
+ + *
+ + * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
+ + * it is not a good general-purpose API.
+ + *
+ + * Return: zero on success, -ve otherwise.
+ + */
+ +int follow_pte(struct mm_struct *mm, unsigned long address,
+ +	       pte_t **ptepp, spinlock_t **ptlp)
+ +{
+ +	pgd_t *pgd;
+ +	p4d_t *p4d;
+ +	pud_t *pud;
+ +	pmd_t *pmd;
+ +	pte_t *ptep;
+ +
+ +	pgd = pgd_offset(mm, address);
+ +	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ +		goto out;
+ +
+ +	p4d = p4d_offset(pgd, address);
+ +	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+ +		goto out;
+ +
+ +	pud = pud_offset(p4d, address);
+ +	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+ +		goto out;
+ +
+ +	pmd = pmd_offset(pud, address);
+ +	VM_BUG_ON(pmd_trans_huge(*pmd));
+ +
+ +	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
+ +	if (!ptep)
+ +		goto out;
+ +	if (!pte_present(ptep_get(ptep)))
+ +		goto unlock;
+ +	*ptepp = ptep;
+ +	return 0;
+ +unlock:
+ +	pte_unmap_unlock(ptep, *ptlp);
+ +out:
+ +	return -EINVAL;
+ +}
+ +EXPORT_SYMBOL_GPL(follow_pte);
+++=======
++ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
++ 				     spinlock_t *lock, pte_t *ptep,
++ 				     pgprot_t pgprot, unsigned long pfn_base,
++ 				     unsigned long addr_mask, bool writable,
++ 				     bool special)
++ {
++ 	args->lock = lock;
++ 	args->ptep = ptep;
++ 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
++ 	args->pgprot = pgprot;
++ 	args->writable = writable;
++ 	args->special = special;
++ }
++ 
++ static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
++ {
++ #ifdef CONFIG_LOCKDEP
++ 	struct address_space *mapping = vma->vm_file->f_mapping;
++ 
++ 	if (mapping)
++ 		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
++ 			       lockdep_is_held(&vma->vm_mm->mmap_lock));
++ 	else
++ 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
++ #endif
++ }
+++>>>>>>> b0a1c0d0edcd (mm: remove follow_pte())
+  
+  /**
+ - * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
+ - * @args: Pointer to struct @follow_pfnmap_args
+ + * follow_pfn - look up PFN at a user virtual address
+ + * @vma: memory mapping
+ + * @address: user virtual address
+ + * @pfn: location to store found PFN
+   *
+ - * The caller needs to setup args->vma and args->address to point to the
+ - * virtual address as the target of such lookup.  On a successful return,
+ - * the results will be put into other output fields.
+ + * Only IO mappings and raw PFN mappings are allowed.
+   *
+ - * After the caller finished using the fields, the caller must invoke
+ - * another follow_pfnmap_end() to proper releases the locks and resources
+ - * of such look up request.
+ + * This function does not allow the caller to read the permissions
+ + * of the PTE.  Do not use it.
+   *
+ - * During the start() and end() calls, the results in @args will be valid
+ - * as proper locks will be held.  After the end() is called, all the fields
+ - * in @follow_pfnmap_args will be invalid to be further accessed.  Further
+ - * use of such information after end() may require proper synchronizations
+ - * by the caller with page table updates, otherwise it can create a
+ - * security bug.
+ - *
+ - * If the PTE maps a refcounted page, callers are responsible to protect
+ - * against invalidation with MMU notifiers; otherwise access to the PFN at
+ - * a later point in time can trigger use-after-free.
+ - *
+ - * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
+ - * should be taken for read, and the mmap semaphore cannot be released
+ - * before the end() is invoked.
+ - *
+ - * This function must not be used to modify PTE content.
+ - *
+ - * Return: zero on success, negative otherwise.
+ + * Return: zero and the pfn at @pfn on success, -ve otherwise.
+   */
+ -int follow_pfnmap_start(struct follow_pfnmap_args *args)
+ +int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ +	unsigned long *pfn)
+  {
+ -	struct vm_area_struct *vma = args->vma;
+ -	unsigned long address = args->address;
+ -	struct mm_struct *mm = vma->vm_mm;
+ -	spinlock_t *lock;
+ -	pgd_t *pgdp;
+ -	p4d_t *p4dp, p4d;
+ -	pud_t *pudp, pud;
+ -	pmd_t *pmdp, pmd;
+ -	pte_t *ptep, pte;
+ +	int ret = -EINVAL;
+ +	spinlock_t *ptl;
+ +	pte_t *ptep;
+  
+ -	pfnmap_lockdep_assert(vma);
+ +	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ +		return ret;
+  
+ -	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ -		goto out;
+ +	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
+ +	if (ret)
+ +		return ret;
+ +	*pfn = pte_pfn(ptep_get(ptep));
+ +	pte_unmap_unlock(ptep, ptl);
+ +	return 0;
+ +}
+ +EXPORT_SYMBOL(follow_pfn);
+  
+ -	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ -		goto out;
+ -retry:
+ -	pgdp = pgd_offset(mm, address);
+ -	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
+ -		goto out;
+ +#ifdef CONFIG_HAVE_IOREMAP_PROT
+ +int follow_phys(struct vm_area_struct *vma,
+ +		unsigned long address, unsigned int flags,
+ +		unsigned long *prot, resource_size_t *phys)
+ +{
+ +	int ret = -EINVAL;
+ +	pte_t *ptep, pte;
+ +	spinlock_t *ptl;
+  
+ -	p4dp = p4d_offset(pgdp, address);
+ -	p4d = READ_ONCE(*p4dp);
+ -	if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
+ +	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+  		goto out;
+  
+ -	pudp = pud_offset(p4dp, address);
+ -	pud = READ_ONCE(*pudp);
+ -	if (pud_none(pud))
+ +	if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+  		goto out;
+ -	if (pud_leaf(pud)) {
+ -		lock = pud_lock(mm, pudp);
+ -		if (!unlikely(pud_leaf(pud))) {
+ -			spin_unlock(lock);
+ -			goto retry;
+ -		}
+ -		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
+ -				  pud_pfn(pud), PUD_MASK, pud_write(pud),
+ -				  pud_special(pud));
+ -		return 0;
+ -	}
+ +	pte = ptep_get(ptep);
+  
+ -	pmdp = pmd_offset(pudp, address);
+ -	pmd = pmdp_get_lockless(pmdp);
+ -	if (pmd_leaf(pmd)) {
+ -		lock = pmd_lock(mm, pmdp);
+ -		if (!unlikely(pmd_leaf(pmd))) {
+ -			spin_unlock(lock);
+ -			goto retry;
+ -		}
+ -		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
+ -				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
+ -				  pmd_special(pmd));
+ -		return 0;
+ -	}
+ +	/* Never return PFNs of anon folios in COW mappings. */
+ +	if (vm_normal_folio(vma, address, pte))
+ +		goto unlock;
+  
+ -	ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
+ -	if (!ptep)
+ -		goto out;
+ -	pte = ptep_get(ptep);
+ -	if (!pte_present(pte))
+ +	if ((flags & FOLL_WRITE) && !pte_write(pte))
+  		goto unlock;
+ -	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
+ -			  pte_pfn(pte), PAGE_MASK, pte_write(pte),
+ -			  pte_special(pte));
+ -	return 0;
+ +
+ +	*prot = pgprot_val(pte_pgprot(pte));
+ +	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+ +
+ +	ret = 0;
+  unlock:
+ -	pte_unmap_unlock(ptep, lock);
+ +	pte_unmap_unlock(ptep, ptl);
+  out:
+ -	return -EINVAL;
+ -}
+ -EXPORT_SYMBOL_GPL(follow_pfnmap_start);
+ -
+ -/**
+ - * follow_pfnmap_end(): End a follow_pfnmap_start() process
+ - * @args: Pointer to struct @follow_pfnmap_args
+ - *
+ - * Must be used in pair of follow_pfnmap_start().  See the start() function
+ - * above for more information.
+ - */
+ -void follow_pfnmap_end(struct follow_pfnmap_args *args)
+ -{
+ -	if (args->lock)
+ -		spin_unlock(args->lock);
+ -	if (args->ptep)
+ -		pte_unmap(args->ptep);
+ +	return ret;
+  }
+ -EXPORT_SYMBOL_GPL(follow_pfnmap_end);
+  
+ -#ifdef CONFIG_HAVE_IOREMAP_PROT
+  /**
+   * generic_access_phys - generic implementation for iomem mmap access
+   * @vma: the vma to access
+* Unmerged path include/linux/mm.h
+* Unmerged path mm/memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b17269a5.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b17269a5.failed
new file mode 100644
index 0000000000000..25f21aaca81ce
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b17269a5.failed
@@ -0,0 +1,105 @@
+mm/access_process_vm: use the new follow_pfnmap API
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit b17269a51cc7f046a6f2cf9a6c314a0de885e5a5
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b17269a5.failed
+
+Use the new API that can understand huge pfn mappings.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-16-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit b17269a51cc7f046a6f2cf9a6c314a0de885e5a5)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/memory.c
+diff --cc mm/memory.c
+index e2794e3b8919,cfc278691466..000000000000
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@@ -5749,37 -6341,34 +5749,45 @@@ int generic_access_phys(struct vm_area_
+  	resource_size_t phys_addr;
+  	unsigned long prot = 0;
+  	void __iomem *maddr;
+- 	pte_t *ptep, pte;
+- 	spinlock_t *ptl;
+  	int offset = offset_in_page(addr);
+  	int ret = -EINVAL;
++ 	bool writable;
++ 	struct follow_pfnmap_args args = { .vma = vma, .address = addr };
+  
+ +	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ +		return -EINVAL;
+ +
+  retry:
+++<<<<<<< HEAD
+ +	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
+++=======
++ 	if (follow_pfnmap_start(&args))
+++>>>>>>> b17269a51cc7 (mm/access_process_vm: use the new follow_pfnmap API)
+  		return -EINVAL;
+- 	pte = ptep_get(ptep);
+- 	pte_unmap_unlock(ptep, ptl);
++ 	prot = pgprot_val(args.pgprot);
++ 	phys_addr = (resource_size_t)args.pfn << PAGE_SHIFT;
++ 	writable = args.writable;
++ 	follow_pfnmap_end(&args);
+  
+- 	prot = pgprot_val(pte_pgprot(pte));
+- 	phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+- 
+- 	if ((write & FOLL_WRITE) && !pte_write(pte))
++ 	if ((write & FOLL_WRITE) && !writable)
+  		return -EINVAL;
+  
+  	maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
+  	if (!maddr)
+  		return -ENOMEM;
+  
+++<<<<<<< HEAD
+ +	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
+++=======
++ 	if (follow_pfnmap_start(&args))
+++>>>>>>> b17269a51cc7 (mm/access_process_vm: use the new follow_pfnmap API)
+  		goto out_unmap;
+  
+- 	if (!pte_same(pte, ptep_get(ptep))) {
+- 		pte_unmap_unlock(ptep, ptl);
++ 	if ((prot != pgprot_val(args.pgprot)) ||
++ 	    (phys_addr != (args.pfn << PAGE_SHIFT)) ||
++ 	    (writable != args.writable)) {
++ 		follow_pfnmap_end(&args);
+  		iounmap(maddr);
+- 
+  		goto retry;
+  	}
+  
+* Unmerged path mm/memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b1b46751.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b1b46751.failed
new file mode 100644
index 0000000000000..871b89f15e6a1
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b1b46751.failed
@@ -0,0 +1,209 @@
+mm: fix follow_pfnmap API lockdep assert
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Linus Torvalds <torvalds@linux-foundation.org>
+commit b1b46751671be5a426982f037a47ae05f37ff80b
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/b1b46751.failed
+
+The lockdep asserts for the new follow_pfnmap() API "knows" that a
+pfnmap always has a vma->vm_file, since that's the only way to create
+such a mapping.
+
+And that's actually true for all the normal cases.  But not for the mmap
+failure case, where the incomplete mapping is torn down and we have
+cleared vma->vm_file because the failure occured before the file was
+linked to the vma.
+
+So this codepath does actually need to check for vm_file being NULL.
+
+	Reported-by: Jann Horn <jannh@google.com>
+Fixes: 6da8e9634bb7 ("mm: new follow_pfnmap API")
+	Cc: Peter Xu <peterx@redhat.com>
+	Cc: Andrew Morton <akpm@linux-foundation.org>
+	Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+(cherry picked from commit b1b46751671be5a426982f037a47ae05f37ff80b)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/memory.c
+diff --cc mm/memory.c
+index e2794e3b8919,3ccee51adfbb..000000000000
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@@ -5607,60 -6333,136 +5607,91 @@@ int __pmd_alloc(struct mm_struct *mm, p
+  }
+  #endif /* __PAGETABLE_PMD_FOLDED */
+  
+++<<<<<<< HEAD
+++=======
++ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
++ 				     spinlock_t *lock, pte_t *ptep,
++ 				     pgprot_t pgprot, unsigned long pfn_base,
++ 				     unsigned long addr_mask, bool writable,
++ 				     bool special)
++ {
++ 	args->lock = lock;
++ 	args->ptep = ptep;
++ 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
++ 	args->pgprot = pgprot;
++ 	args->writable = writable;
++ 	args->special = special;
++ }
++ 
++ static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
++ {
++ #ifdef CONFIG_LOCKDEP
++ 	struct file *file = vma->vm_file;
++ 	struct address_space *mapping = file ? file->f_mapping : NULL;
++ 
++ 	if (mapping)
++ 		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
++ 			       lockdep_is_held(&vma->vm_mm->mmap_lock));
++ 	else
++ 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
++ #endif
++ }
++ 
+++>>>>>>> b1b46751671b (mm: fix follow_pfnmap API lockdep assert)
+  /**
+ - * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
+ - * @args: Pointer to struct @follow_pfnmap_args
+ - *
+ - * The caller needs to setup args->vma and args->address to point to the
+ - * virtual address as the target of such lookup.  On a successful return,
+ - * the results will be put into other output fields.
+ - *
+ - * After the caller finished using the fields, the caller must invoke
+ - * another follow_pfnmap_end() to proper releases the locks and resources
+ - * of such look up request.
+ - *
+ - * During the start() and end() calls, the results in @args will be valid
+ - * as proper locks will be held.  After the end() is called, all the fields
+ - * in @follow_pfnmap_args will be invalid to be further accessed.  Further
+ - * use of such information after end() may require proper synchronizations
+ - * by the caller with page table updates, otherwise it can create a
+ - * security bug.
+ + * follow_pte - look up PTE at a user virtual address
+ + * @mm: the mm_struct of the target address space
+ + * @address: user virtual address
+ + * @ptepp: location to store found PTE
+ + * @ptlp: location to store the lock for the PTE
+   *
+ - * If the PTE maps a refcounted page, callers are responsible to protect
+ - * against invalidation with MMU notifiers; otherwise access to the PFN at
+ - * a later point in time can trigger use-after-free.
+ + * On a successful return, the pointer to the PTE is stored in @ptepp;
+ + * the corresponding lock is taken and its location is stored in @ptlp.
+ + * The contents of the PTE are only stable until @ptlp is released;
+ + * any further use, if any, must be protected against invalidation
+ + * with MMU notifiers.
+   *
+   * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
+ - * should be taken for read, and the mmap semaphore cannot be released
+ - * before the end() is invoked.
+ + * should be taken for read.
+   *
+ - * This function must not be used to modify PTE content.
+ + * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
+ + * it is not a good general-purpose API.
+   *
+ - * Return: zero on success, negative otherwise.
+ + * Return: zero on success, -ve otherwise.
+   */
+ -int follow_pfnmap_start(struct follow_pfnmap_args *args)
+ +int follow_pte(struct mm_struct *mm, unsigned long address,
+ +	       pte_t **ptepp, spinlock_t **ptlp)
+  {
+ -	struct vm_area_struct *vma = args->vma;
+ -	unsigned long address = args->address;
+ -	struct mm_struct *mm = vma->vm_mm;
+ -	spinlock_t *lock;
+ -	pgd_t *pgdp;
+ -	p4d_t *p4dp, p4d;
+ -	pud_t *pudp, pud;
+ -	pmd_t *pmdp, pmd;
+ -	pte_t *ptep, pte;
+ -
+ -	pfnmap_lockdep_assert(vma);
+ -
+ -	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ -		goto out;
+ +	pgd_t *pgd;
+ +	p4d_t *p4d;
+ +	pud_t *pud;
+ +	pmd_t *pmd;
+ +	pte_t *ptep;
+  
+ -	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ -		goto out;
+ -retry:
+ -	pgdp = pgd_offset(mm, address);
+ -	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
+ +	pgd = pgd_offset(mm, address);
+ +	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+  		goto out;
+  
+ -	p4dp = p4d_offset(pgdp, address);
+ -	p4d = READ_ONCE(*p4dp);
+ -	if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
+ +	p4d = p4d_offset(pgd, address);
+ +	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+  		goto out;
+  
+ -	pudp = pud_offset(p4dp, address);
+ -	pud = READ_ONCE(*pudp);
+ -	if (pud_none(pud))
+ +	pud = pud_offset(p4d, address);
+ +	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+  		goto out;
+ -	if (pud_leaf(pud)) {
+ -		lock = pud_lock(mm, pudp);
+ -		if (!unlikely(pud_leaf(pud))) {
+ -			spin_unlock(lock);
+ -			goto retry;
+ -		}
+ -		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
+ -				  pud_pfn(pud), PUD_MASK, pud_write(pud),
+ -				  pud_special(pud));
+ -		return 0;
+ -	}
+  
+ -	pmdp = pmd_offset(pudp, address);
+ -	pmd = pmdp_get_lockless(pmdp);
+ -	if (pmd_leaf(pmd)) {
+ -		lock = pmd_lock(mm, pmdp);
+ -		if (!unlikely(pmd_leaf(pmd))) {
+ -			spin_unlock(lock);
+ -			goto retry;
+ -		}
+ -		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
+ -				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
+ -				  pmd_special(pmd));
+ -		return 0;
+ -	}
+ +	pmd = pmd_offset(pud, address);
+ +	VM_BUG_ON(pmd_trans_huge(*pmd));
+  
+ -	ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
+ +	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
+  	if (!ptep)
+  		goto out;
+ -	pte = ptep_get(ptep);
+ -	if (!pte_present(pte))
+ +	if (!pte_present(ptep_get(ptep)))
+  		goto unlock;
+ -	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
+ -			  pte_pfn(pte), PAGE_MASK, pte_write(pte),
+ -			  pte_special(pte));
+ +	*ptepp = ptep;
+  	return 0;
+  unlock:
+ -	pte_unmap_unlock(ptep, lock);
+ +	pte_unmap_unlock(ptep, *ptlp);
+  out:
+  	return -EINVAL;
+  }
+* Unmerged path mm/memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/bd8c2d18.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/bd8c2d18.failed
new file mode 100644
index 0000000000000..da24628b81a33
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/bd8c2d18.failed
@@ -0,0 +1,76 @@
+s390/pci_mmio: use follow_pfnmap API
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit bd8c2d18bf5cccd8842d00b17d6f222beb98b1b3
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/bd8c2d18.failed
+
+Use the new API that can understand huge pfn mappings.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-12-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit bd8c2d18bf5cccd8842d00b17d6f222beb98b1b3)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	arch/s390/pci/pci_mmio.c
+diff --cc arch/s390/pci/pci_mmio.c
+index 588089332931,de5c0b389a3e..000000000000
+--- a/arch/s390/pci/pci_mmio.c
++++ b/arch/s390/pci/pci_mmio.c
+@@@ -169,7 -168,9 +168,13 @@@ SYSCALL_DEFINE3(s390_pci_mmio_write, un
+  	if (!(vma->vm_flags & VM_WRITE))
+  		goto out_unlock_mmap;
+  
+++<<<<<<< HEAD
+ +	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+++=======
++ 	args.address = mmio_addr;
++ 	args.vma = vma;
++ 	ret = follow_pfnmap_start(&args);
+++>>>>>>> bd8c2d18bf5c (s390/pci_mmio: use follow_pfnmap API)
+  	if (ret)
+  		goto out_unlock_mmap;
+  
+@@@ -308,7 -308,9 +312,13 @@@ SYSCALL_DEFINE3(s390_pci_mmio_read, uns
+  	if (!(vma->vm_flags & VM_WRITE))
+  		goto out_unlock_mmap;
+  
+++<<<<<<< HEAD
+ +	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+++=======
++ 	args.vma = vma;
++ 	args.address = mmio_addr;
++ 	ret = follow_pfnmap_start(&args);
+++>>>>>>> bd8c2d18bf5c (s390/pci_mmio: use follow_pfnmap API)
+  	if (ret)
+  		goto out_unlock_mmap;
+  
+* Unmerged path arch/s390/pci/pci_mmio.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c1d9dac0.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c1d9dac0.failed
new file mode 100644
index 0000000000000..ec6e80e4cf602
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c1d9dac0.failed
@@ -0,0 +1,157 @@
+vfio/pci: Align huge faults to order
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Alex Williamson <alex.williamson@redhat.com>
+commit c1d9dac0db168198b6f63f460665256dedad9b6e
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c1d9dac0.failed
+
+The vfio-pci huge_fault handler doesn't make any attempt to insert a
+mapping containing the faulting address, it only inserts mappings if the
+faulting address and resulting pfn are aligned.  This works in a lot of
+cases, particularly in conjunction with QEMU where DMA mappings linearly
+fault the mmap.  However, there are configurations where we don't get
+that linear faulting and pages are faulted on-demand.
+
+The scenario reported in the bug below is such a case, where the physical
+address width of the CPU is greater than that of the IOMMU, resulting in a
+VM where guest firmware has mapped device MMIO beyond the address width of
+the IOMMU.  In this configuration, the MMIO is faulted on demand and
+tracing indicates that occasionally the faults generate a VM_FAULT_OOM.
+Given the use case, this results in a "error: kvm run failed Bad address",
+killing the VM.
+
+The host is not under memory pressure in this test, therefore it's
+suspected that VM_FAULT_OOM is actually the result of a NULL return from
+__pte_offset_map_lock() in the get_locked_pte() path from insert_pfn().
+This suggests a potential race inserting a pte concurrent to a pmd, and
+maybe indicates some deficiency in the mm layer properly handling such a
+case.
+
+Nevertheless, Peter noted the inconsistency of vfio-pci's huge_fault
+handler where our mapping granularity depends on the alignment of the
+faulting address relative to the order rather than aligning the faulting
+address to the order to more consistently insert huge mappings.  This
+change not only uses the page tables more consistently and efficiently, but
+as any fault to an aligned page results in the same mapping, the race
+condition suspected in the VM_FAULT_OOM is avoided.
+
+	Reported-by: Adolfo <adolfotregosa@gmail.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220057
+Fixes: 09dfc8a5f2ce ("vfio/pci: Fallback huge faults for unaligned pfn")
+	Cc: stable@vger.kernel.org
+	Tested-by: Adolfo <adolfotregosa@gmail.com>
+Co-developed-by: Peter Xu <peterx@redhat.com>
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+Link: https://lore.kernel.org/r/20250502224035.3183451-1-alex.williamson@redhat.com
+	Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+(cherry picked from commit c1d9dac0db168198b6f63f460665256dedad9b6e)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	drivers/vfio/pci/vfio_pci_core.c
+diff --cc drivers/vfio/pci/vfio_pci_core.c
+index ffda816e0119,6328c3a05bcd..000000000000
+--- a/drivers/vfio/pci/vfio_pci_core.c
++++ b/drivers/vfio/pci/vfio_pci_core.c
+@@@ -1770,49 -1646,59 +1770,63 @@@ static vm_fault_t vfio_pci_mmap_fault(s
+  {
+  	struct vm_area_struct *vma = vmf->vma;
+  	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+++<<<<<<< HEAD
+ +	struct vfio_pci_mmap_vma *mmap_vma;
+ +	vm_fault_t ret = VM_FAULT_NOPAGE;
+++=======
++ 	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
++ 	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
++ 	unsigned long pfn = vma_to_pfn(vma) + pgoff;
++ 	vm_fault_t ret = VM_FAULT_SIGBUS;
++ 
++ 	if (order && (addr < vma->vm_start ||
++ 		      addr + (PAGE_SIZE << order) > vma->vm_end ||
++ 		      pfn & ((1 << order) - 1))) {
++ 		ret = VM_FAULT_FALLBACK;
++ 		goto out;
++ 	}
+++>>>>>>> c1d9dac0db16 (vfio/pci: Align huge faults to order)
+  
+ +	mutex_lock(&vdev->vma_lock);
+  	down_read(&vdev->memory_lock);
+  
+ -	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
+ -		goto out_unlock;
+ +	/*
+ +	 * Memory region cannot be accessed if the low power feature is engaged
+ +	 * or memory access is disabled.
+ +	 */
+ +	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
+ +		ret = VM_FAULT_SIGBUS;
+ +		goto up_out;
+ +	}
+  
+ -	switch (order) {
+ -	case 0:
+ -		ret = vmf_insert_pfn(vma, vmf->address, pfn);
+ -		break;
+ -#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+ -	case PMD_ORDER:
+ -		ret = vmf_insert_pfn_pmd(vmf,
+ -					 __pfn_to_pfn_t(pfn, PFN_DEV), false);
+ -		break;
+ -#endif
+ -#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+ -	case PUD_ORDER:
+ -		ret = vmf_insert_pfn_pud(vmf,
+ -					 __pfn_to_pfn_t(pfn, PFN_DEV), false);
+ -		break;
+ -#endif
+ -	default:
+ -		ret = VM_FAULT_FALLBACK;
+ +	/*
+ +	 * We populate the whole vma on fault, so we need to test whether
+ +	 * the vma has already been mapped, such as for concurrent faults
+ +	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
+ +	 * we ask it to fill the same range again.
+ +	 */
+ +	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+ +		if (mmap_vma->vma == vma)
+ +			goto up_out;
+  	}
+  
+ -out_unlock:
+ -	up_read(&vdev->memory_lock);
+ -out:
+ -	dev_dbg_ratelimited(&vdev->pdev->dev,
+ -			   "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
+ -			    __func__, order,
+ -			    vma->vm_pgoff >>
+ -				(VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
+ -			    pgoff, (unsigned int)ret);
+ +	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ +			       vma->vm_end - vma->vm_start,
+ +			       vma->vm_page_prot)) {
+ +		ret = VM_FAULT_SIGBUS;
+ +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +		goto up_out;
+ +	}
+  
+ -	return ret;
+ -}
+ +	if (__vfio_pci_add_vma(vdev, vma)) {
+ +		ret = VM_FAULT_OOM;
+ +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +	}
+  
+ -static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
+ -{
+ -	return vfio_pci_mmap_huge_fault(vmf, 0);
+ +up_out:
+ +	up_read(&vdev->memory_lock);
+ +	mutex_unlock(&vdev->vma_lock);
+ +	return ret;
+  }
+  
+  static const struct vm_operations_struct vfio_pci_mmap_ops = {
+* Unmerged path drivers/vfio/pci/vfio_pci_core.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c5541ba3.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c5541ba3.failed
new file mode 100644
index 0000000000000..efc4e67b46099
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c5541ba3.failed
@@ -0,0 +1,83 @@
+mm: follow_pte() improvements
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author David Hildenbrand <david@redhat.com>
+commit c5541ba378e3d36ea88bf5839d5b23e33e7d1627
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/c5541ba3.failed
+
+follow_pte() is now our main function to lookup PTEs in VM_PFNMAP/VM_IO
+VMAs.  Let's perform some more sanity checks to make this exported
+function harder to abuse.
+
+Further, extend the doc a bit, it still focuses on the KVM use case with
+MMU notifiers.  Drop the KVM+follow_pfn() comment, follow_pfn() is no
+more, and we have other users nowadays.
+
+Also extend the doc regarding refcounted pages and the interaction with
+MMU notifiers.
+
+KVM is one example that uses MMU notifiers and can deal with refcounted
+pages properly.  VFIO is one example that doesn't use MMU notifiers, and
+to prevent use-after-free, rejects refcounted pages: pfn_valid(pfn) &&
+!PageReserved(pfn_to_page(pfn)).  Protection changes are less of a concern
+for users like VFIO: the behavior is similar to longterm-pinning a page,
+and getting the PTE protection changed afterwards.
+
+The primary concern with refcounted pages is use-after-free, which callers
+should be aware of.
+
+Link: https://lkml.kernel.org/r/20240410155527.474777-4-david@redhat.com
+	Signed-off-by: David Hildenbrand <david@redhat.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Christoph Hellwig <hch@lst.de>
+	Cc: Fei Li <fei1.li@intel.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Yonghua Huang <yonghua.huang@intel.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit c5541ba378e3d36ea88bf5839d5b23e33e7d1627)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/memory.c
+diff --cc mm/memory.c
+index e2794e3b8919,36ba94eae853..000000000000
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@@ -5623,8 -5947,7 +5630,12 @@@ int __pmd_alloc(struct mm_struct *mm, p
+   * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
+   * should be taken for read.
+   *
+++<<<<<<< HEAD
+ + * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
+ + * it is not a good general-purpose API.
+++=======
++  * This function must not be used to modify PTE content.
+++>>>>>>> c5541ba378e3 (mm: follow_pte() improvements)
+   *
+   * Return: zero on success, -ve otherwise.
+   */
+@@@ -5637,6 -5961,13 +5648,16 @@@ int follow_pte(struct mm_struct *mm, un
+  	pmd_t *pmd;
+  	pte_t *ptep;
+  
+++<<<<<<< HEAD
+++=======
++ 	mmap_assert_locked(mm);
++ 	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
++ 		goto out;
++ 
++ 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
++ 		goto out;
++ 
+++>>>>>>> c5541ba378e3 (mm: follow_pte() improvements)
+  	pgd = pgd_offset(mm, address);
+  	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+  		goto out;
+* Unmerged path mm/memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cb10c28a.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cb10c28a.failed
new file mode 100644
index 0000000000000..db5b5c1060134
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cb10c28a.failed
@@ -0,0 +1,132 @@
+mm: remove follow_pfn
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Christoph Hellwig <hch@lst.de>
+commit cb10c28ac82c9b7a5e9b3b1dc7157036c20c36dd
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cb10c28a.failed
+
+Remove follow_pfn now that the last user is gone.
+
+Link: https://lkml.kernel.org/r/20240324234542.2038726-3-hch@lst.de
+	Signed-off-by: Christoph Hellwig <hch@lst.de>
+	Reviewed-by: David Hildenbrand <david@redhat.com>
+	Cc: Andy Lutomirski <luto@kernel.org>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: Fei Li <fei1.li@intel.com>
+	Cc: Ingo Molnar <mingo@kernel.org>
+	Cc: Peter Zijlstra <peterz@infradead.org>
+	Cc: Nathan Chancellor <nathan@kernel.org>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit cb10c28ac82c9b7a5e9b3b1dc7157036c20c36dd)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	mm/nommu.c
+diff --cc mm/nommu.c
+index f3f6a7e97647,331d2f778695..000000000000
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@@ -110,29 -110,6 +110,32 @@@ unsigned int kobjsize(const void *objp
+  	return page_size(page);
+  }
+  
+++<<<<<<< HEAD
+ +/**
+ + * follow_pfn - look up PFN at a user virtual address
+ + * @vma: memory mapping
+ + * @address: user virtual address
+ + * @pfn: location to store found PFN
+ + *
+ + * Only IO mappings and raw PFN mappings are allowed.
+ + *
+ + * Returns zero and the pfn at @pfn on success, -ve otherwise.
+ + */
+ +int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+ +	unsigned long *pfn)
+ +{
+ +	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ +		return -EINVAL;
+ +
+ +	*pfn = address >> PAGE_SHIFT;
+ +	return 0;
+ +}
+ +EXPORT_SYMBOL(follow_pfn);
+ +
+ +LIST_HEAD(vmap_area_list);
+ +
+++=======
+++>>>>>>> cb10c28ac82c (mm: remove follow_pfn)
+  void vfree(const void *addr)
+  {
+  	kfree(addr);
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 196c481ec160..cd16e4cb2ce0 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2429,8 +2429,6 @@ int
+ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+ int follow_pte(struct mm_struct *mm, unsigned long address,
+ 	       pte_t **ptepp, spinlock_t **ptlp);
+-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+-	unsigned long *pfn);
+ int follow_phys(struct vm_area_struct *vma, unsigned long address,
+ 		unsigned int flags, unsigned long *prot, resource_size_t *phys);
+ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
+diff --git a/mm/memory.c b/mm/memory.c
+index e2794e3b8919..4498a39fb51d 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5623,8 +5623,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+  * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
+  * should be taken for read.
+  *
+- * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
+- * it is not a good general-purpose API.
++ * KVM uses this function.  While it is arguably less bad than the historic
++ * ``follow_pfn``, it is not a good general-purpose API.
+  *
+  * Return: zero on success, -ve otherwise.
+  */
+@@ -5666,38 +5666,6 @@ int follow_pte(struct mm_struct *mm, unsigned long address,
+ }
+ EXPORT_SYMBOL_GPL(follow_pte);
+ 
+-/**
+- * follow_pfn - look up PFN at a user virtual address
+- * @vma: memory mapping
+- * @address: user virtual address
+- * @pfn: location to store found PFN
+- *
+- * Only IO mappings and raw PFN mappings are allowed.
+- *
+- * This function does not allow the caller to read the permissions
+- * of the PTE.  Do not use it.
+- *
+- * Return: zero and the pfn at @pfn on success, -ve otherwise.
+- */
+-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+-	unsigned long *pfn)
+-{
+-	int ret = -EINVAL;
+-	spinlock_t *ptl;
+-	pte_t *ptep;
+-
+-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+-		return ret;
+-
+-	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
+-	if (ret)
+-		return ret;
+-	*pfn = pte_pfn(ptep_get(ptep));
+-	pte_unmap_unlock(ptep, ptl);
+-	return 0;
+-}
+-EXPORT_SYMBOL(follow_pfn);
+-
+ #ifdef CONFIG_HAVE_IOREMAP_PROT
+ int follow_phys(struct vm_area_struct *vma,
+ 		unsigned long address, unsigned int flags,
+* Unmerged path mm/nommu.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cbea8536.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cbea8536.failed
new file mode 100644
index 0000000000000..3341542fa241c
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cbea8536.failed
@@ -0,0 +1,78 @@
+mm/x86/pat: use the new follow_pfnmap API
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit cbea8536d933d546ceb1005bf9c04f9d01da8092
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/cbea8536.failed
+
+Use the new API that can understand huge pfn mappings.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-13-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit cbea8536d933d546ceb1005bf9c04f9d01da8092)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	arch/x86/mm/pat/memtype.c
+diff --cc arch/x86/mm/pat/memtype.c
+index 36b603d0cdde,f73b5ce270b3..000000000000
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@@ -947,6 -948,26 +947,29 @@@ static void free_pfn_range(u64 paddr, u
+  		memtype_free(paddr, paddr + size);
+  }
+  
+++<<<<<<< HEAD
+++=======
++ static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
++ 		resource_size_t *phys)
++ {
++ 	struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
++ 
++ 	if (follow_pfnmap_start(&args))
++ 		return -EINVAL;
++ 
++ 	/* Never return PFNs of anon folios in COW mappings. */
++ 	if (!args.special) {
++ 		follow_pfnmap_end(&args);
++ 		return -EINVAL;
++ 	}
++ 
++ 	*prot = pgprot_val(args.pgprot);
++ 	*phys = (resource_size_t)args.pfn << PAGE_SHIFT;
++ 	follow_pfnmap_end(&args);
++ 	return 0;
++ }
++ 
+++>>>>>>> cbea8536d933 (mm/x86/pat: use the new follow_pfnmap API)
+  static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+  		pgprot_t *pgprot)
+  {
+* Unmerged path arch/x86/mm/pat/memtype.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/ef713ec3.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/ef713ec3.failed
new file mode 100644
index 0000000000000..a094d940ee94c
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/ef713ec3.failed
@@ -0,0 +1,118 @@
+mm: drop is_huge_zero_pud()
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Peter Xu <peterx@redhat.com>
+commit ef713ec3a566d3e5e011c5d6201eb661ebf94c1f
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/ef713ec3.failed
+
+It constantly returns false since 2017.  One assertion is added in 2019 but
+it should never have triggered, IOW it means what is checked should be
+asserted instead.
+
+If it didn't exist for 7 years maybe it's good idea to remove it and only
+add it when it comes.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-3-peterx@redhat.com
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+	Acked-by: David Hildenbrand <david@redhat.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Alex Williamson <alex.williamson@redhat.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit ef713ec3a566d3e5e011c5d6201eb661ebf94c1f)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	include/linux/huge_mm.h
+#	mm/huge_memory.c
+diff --cc include/linux/huge_mm.h
+index fc789c0ac85b,ffca706bac81..000000000000
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@@ -256,13 -433,8 +256,18 @@@ static inline bool is_huge_zero_pmd(pmd
+  	return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
+  }
+  
+++<<<<<<< HEAD
+ +static inline bool is_huge_zero_pud(pud_t pud)
+ +{
+ +	return false;
+ +}
+ +
+ +struct page *mm_get_huge_zero_page(struct mm_struct *mm);
+ +void mm_put_huge_zero_page(struct mm_struct *mm);
+++=======
++ struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
++ void mm_put_huge_zero_folio(struct mm_struct *mm);
+++>>>>>>> ef713ec3a566 (mm: drop is_huge_zero_pud())
+  
+  #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
+  
+@@@ -379,12 -573,7 +384,16 @@@ static inline bool is_huge_zero_pmd(pmd
+  	return false;
+  }
+  
+++<<<<<<< HEAD
+ +static inline bool is_huge_zero_pud(pud_t pud)
+ +{
+ +	return false;
+ +}
+ +
+ +static inline void mm_put_huge_zero_page(struct mm_struct *mm)
+++=======
++ static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
+++>>>>>>> ef713ec3a566 (mm: drop is_huge_zero_pud())
+  {
+  	return;
+  }
+diff --cc mm/huge_memory.c
+index 20d9b3971dc8,a4a14b81e013..000000000000
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@@ -1236,17 -1703,8 +1234,22 @@@ int copy_huge_pud(struct mm_struct *dst
+  		goto out_unlock;
+  
+  	/*
+++<<<<<<< HEAD
+ +	 * When page table lock is held, the huge zero pud should not be
+ +	 * under splitting since we don't split the page itself, only pud to
+ +	 * a page table.
+ +	 */
+ +	if (is_huge_zero_pud(pud)) {
+ +		/* No huge zero pud yet */
+ +	}
+ +
+ +	/*
+ +	 * TODO: once we support anonymous pages, use page_try_dup_anon_rmap()
+ +	 * and split if duplicating fails.
+++=======
++ 	 * TODO: once we support anonymous pages, use
++ 	 * folio_try_dup_anon_rmap_*() and split if duplicating fails.
+++>>>>>>> ef713ec3a566 (mm: drop is_huge_zero_pud())
+  	 */
+  	pudp_set_wrprotect(src_mm, addr, src_pud);
+  	pud = pud_mkold(pud_wrprotect(pud));
+* Unmerged path include/linux/huge_mm.h
+* Unmerged path mm/huge_memory.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/f9e54c3a.failed b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/f9e54c3a.failed
new file mode 100644
index 0000000000000..5c70b216c6943
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/f9e54c3a.failed
@@ -0,0 +1,229 @@
+vfio/pci: implement huge_fault support
+
+jira LE-3557
+Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
+commit-author Alex Williamson <alex.williamson@redhat.com>
+commit f9e54c3a2f5b79ecc57c7bc7d0d3521e461a2101
+Empty-Commit: Cherry-Pick Conflicts during history rebuild.
+Will be included in final tarball splat. Ref for failed cherry-pick at:
+ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/f9e54c3a.failed
+
+With the addition of pfnmap support in vmf_insert_pfn_{pmd,pud}() we can
+take advantage of PMD and PUD faults to PCI BAR mmaps and create more
+efficient mappings.  PCI BARs are always a power of two and will typically
+get at least PMD alignment without userspace even trying.  Userspace
+alignment for PUD mappings is also not too difficult.
+
+Consolidate faults through a single handler with a new wrapper for
+standard single page faults.  The pre-faulting behavior of commit
+d71a989cf5d9 ("vfio/pci: Insert full vma on mmap'd MMIO fault") is removed
+in this refactoring since huge_fault will cover the bulk of the faults and
+results in more efficient page table usage.  We also want to avoid that
+pre-faulted single page mappings preempt huge page mappings.
+
+Link: https://lkml.kernel.org/r/20240826204353.2228736-20-peterx@redhat.com
+	Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+	Signed-off-by: Peter Xu <peterx@redhat.com>
+	Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+	Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+	Cc: Borislav Petkov <bp@alien8.de>
+	Cc: Catalin Marinas <catalin.marinas@arm.com>
+	Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+	Cc: Dave Hansen <dave.hansen@linux.intel.com>
+	Cc: David Hildenbrand <david@redhat.com>
+	Cc: Gavin Shan <gshan@redhat.com>
+	Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+	Cc: Heiko Carstens <hca@linux.ibm.com>
+	Cc: Ingo Molnar <mingo@redhat.com>
+	Cc: Jason Gunthorpe <jgg@nvidia.com>
+	Cc: Matthew Wilcox <willy@infradead.org>
+	Cc: Niklas Schnelle <schnelle@linux.ibm.com>
+	Cc: Paolo Bonzini <pbonzini@redhat.com>
+	Cc: Ryan Roberts <ryan.roberts@arm.com>
+	Cc: Sean Christopherson <seanjc@google.com>
+	Cc: Sven Schnelle <svens@linux.ibm.com>
+	Cc: Thomas Gleixner <tglx@linutronix.de>
+	Cc: Vasily Gorbik <gor@linux.ibm.com>
+	Cc: Will Deacon <will@kernel.org>
+	Cc: Zi Yan <ziy@nvidia.com>
+	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+(cherry picked from commit f9e54c3a2f5b79ecc57c7bc7d0d3521e461a2101)
+	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
+
+# Conflicts:
+#	drivers/vfio/pci/vfio_pci_core.c
+diff --cc drivers/vfio/pci/vfio_pci_core.c
+index ffda816e0119,2d7478e9a62d..000000000000
+--- a/drivers/vfio/pci/vfio_pci_core.c
++++ b/drivers/vfio/pci/vfio_pci_core.c
+@@@ -1725,100 -1646,82 +1726,161 @@@ void vfio_pci_memory_unlock_and_restore
+  	up_write(&vdev->memory_lock);
+  }
+  
+ -static unsigned long vma_to_pfn(struct vm_area_struct *vma)
+ +/* Caller holds vma_lock */
+ +static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
+ +			      struct vm_area_struct *vma)
+  {
+ -	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+ -	int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ -	u64 pgoff;
+ +	struct vfio_pci_mmap_vma *mmap_vma;
+  
+ -	pgoff = vma->vm_pgoff &
+ -		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ +	mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
+ +	if (!mmap_vma)
+ +		return -ENOMEM;
+ +
+ +	mmap_vma->vma = vma;
+ +	list_add(&mmap_vma->vma_next, &vdev->vma_list);
+ +
+ +	return 0;
+ +}
+ +
+ +/*
+ + * Zap mmaps on open so that we can fault them in on access and therefore
+ + * our vma_list only tracks mappings accessed since last zap.
+ + */
+ +static void vfio_pci_mmap_open(struct vm_area_struct *vma)
+ +{
+ +	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +}
+ +
+ +static void vfio_pci_mmap_close(struct vm_area_struct *vma)
+ +{
+ +	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+ +	struct vfio_pci_mmap_vma *mmap_vma;
+  
+ -	return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
+ +	mutex_lock(&vdev->vma_lock);
+ +	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+ +		if (mmap_vma->vma == vma) {
+ +			list_del(&mmap_vma->vma_next);
+ +			kfree(mmap_vma);
+ +			break;
+ +		}
+ +	}
+ +	mutex_unlock(&vdev->vma_lock);
+  }
+  
+- static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
++ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
++ 					   unsigned int order)
+  {
+  	struct vm_area_struct *vma = vmf->vma;
+  	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+++<<<<<<< HEAD
+ +	struct vfio_pci_mmap_vma *mmap_vma;
+ +	vm_fault_t ret = VM_FAULT_NOPAGE;
+++=======
++ 	unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
++ 	vm_fault_t ret = VM_FAULT_SIGBUS;
++ 
++ 	if (order && (vmf->address & ((PAGE_SIZE << order) - 1) ||
++ 		      vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
++ 		ret = VM_FAULT_FALLBACK;
++ 		goto out;
++ 	}
++ 
++ 	pfn = vma_to_pfn(vma);
+++>>>>>>> f9e54c3a2f5b (vfio/pci: implement huge_fault support)
+  
+ +	mutex_lock(&vdev->vma_lock);
+  	down_read(&vdev->memory_lock);
+  
+++<<<<<<< HEAD
+ +	/*
+ +	 * Memory region cannot be accessed if the low power feature is engaged
+ +	 * or memory access is disabled.
+ +	 */
+ +	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
+ +		ret = VM_FAULT_SIGBUS;
+ +		goto up_out;
+ +	}
+ +
+ +	/*
+ +	 * We populate the whole vma on fault, so we need to test whether
+ +	 * the vma has already been mapped, such as for concurrent faults
+ +	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
+ +	 * we ask it to fill the same range again.
+ +	 */
+ +	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
+ +		if (mmap_vma->vma == vma)
+ +			goto up_out;
+ +	}
+++=======
++ 	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
++ 		goto out_unlock;
++ 
++ 	switch (order) {
++ 	case 0:
++ 		ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
++ 		break;
++ #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
++ 	case PMD_ORDER:
++ 		ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff,
++ 							     PFN_DEV), false);
++ 		break;
++ #endif
++ #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
++ 	case PUD_ORDER:
++ 		ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff,
++ 							     PFN_DEV), false);
++ 		break;
++ #endif
++ 	default:
++ 		ret = VM_FAULT_FALLBACK;
++ 	}
++ 
++ out_unlock:
++ 	up_read(&vdev->memory_lock);
++ out:
++ 	dev_dbg_ratelimited(&vdev->pdev->dev,
++ 			   "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
++ 			    __func__, order,
++ 			    vma->vm_pgoff >>
++ 				(VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
++ 			    pgoff, (unsigned int)ret);
+++>>>>>>> f9e54c3a2f5b (vfio/pci: implement huge_fault support)
+ +
+ +	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ +			       vma->vm_end - vma->vm_start,
+ +			       vma->vm_page_prot)) {
+ +		ret = VM_FAULT_SIGBUS;
+ +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +		goto up_out;
+ +	}
+ +
+ +	if (__vfio_pci_add_vma(vdev, vma)) {
+ +		ret = VM_FAULT_OOM;
+ +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ +	}
+  
+ +up_out:
+ +	up_read(&vdev->memory_lock);
+ +	mutex_unlock(&vdev->vma_lock);
+  	return ret;
+  }
+  
++ static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
++ {
++ 	return vfio_pci_mmap_huge_fault(vmf, 0);
++ }
++ 
+  static const struct vm_operations_struct vfio_pci_mmap_ops = {
+++<<<<<<< HEAD
+ +	.open = vfio_pci_mmap_open,
+ +	.close = vfio_pci_mmap_close,
+ +	.fault = vfio_pci_mmap_fault,
+++=======
++ 	.fault = vfio_pci_mmap_page_fault,
++ #ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
++ 	.huge_fault = vfio_pci_mmap_huge_fault,
++ #endif
+++>>>>>>> f9e54c3a2f5b (vfio/pci: implement huge_fault support)
+  };
+  
+  int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
+* Unmerged path drivers/vfio/pci/vfio_pci_core.c
diff --git a/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/rebuild.details.txt b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/rebuild.details.txt
new file mode 100644
index 0000000000000..5dddd8579777c
--- /dev/null
+++ b/ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/rebuild.details.txt
@@ -0,0 +1,40 @@
+Rebuild_History BUILDABLE
+Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50%
+Number of commits in upstream range v5.14~1..kernel-mainline: 309912
+Number of commits in rpm: 41
+Number of commits matched with upstream: 39 (95.12%)
+Number of commits in upstream but not in rpm: 309873
+Number of commits NOT found in upstream: 2 (4.88%)
+
+Rebuilding Kernel on Branch rocky9_6_rebuild_kernel-5.14.0-570.26.1.el9_6 for kernel-5.14.0-570.26.1.el9_6
+Clean Cherry Picks: 14 (35.90%)
+Empty Cherry Picks: 22 (56.41%)
+_______________________________
+
+__EMPTY COMMITS__________________________
+6857be5fecaebd9773ff27b6d29b6fff3b1abbce mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud
+ef713ec3a566d3e5e011c5d6201eb661ebf94c1f mm: drop is_huge_zero_pud()
+10d83d7781a8a6ff02bafd172c1ab183b27f8d5a mm/pagewalk: check pfnmap for folio_walk_start()
+cb10c28ac82c9b7a5e9b3b1dc7157036c20c36dd mm: remove follow_pfn
+6da8e9634bb7e3fdad9ae0e4db873a05036c4343 mm: new follow_pfnmap API
+b1b46751671be5a426982f037a47ae05f37ff80b mm: fix follow_pfnmap API lockdep assert
+5b34b76cb0cd8a21dee5c7677eae98480b0d05cc mm: move follow_phys to arch/x86/mm/pat/memtype.c
+29ae7d96d166fa08c7232daf8a314ef5ba1efd20 mm: pass VMA instead of MM to follow_pte()
+5731aacd54a883dd2c1a5e8c85e1fe78fc728dc7 KVM: use follow_pfnmap API
+bd8c2d18bf5cccd8842d00b17d6f222beb98b1b3 s390/pci_mmio: use follow_pfnmap API
+cbea8536d933d546ceb1005bf9c04f9d01da8092 mm/x86/pat: use the new follow_pfnmap API
+a77f9489f1d7873a56e1d6640cc0c4865f64176b vfio: use the new follow_pfnmap API
+b17269a51cc7f046a6f2cf9a6c314a0de885e5a5 mm/access_process_vm: use the new follow_pfnmap API
+c5541ba378e3d36ea88bf5839d5b23e33e7d1627 mm: follow_pte() improvements
+b0a1c0d0edcd75a0f8ec5fd19dbd64b8d097f534 mm: remove follow_pte()
+75182022a0439788415b2dd1db3086e07aa506f7 mm/x86: support large pfn mappings
+3e509c9b03f9abc7804c80bed266a6cc4286a5a8 mm/arm64: support large pfn mappings
+f9e54c3a2f5b79ecc57c7bc7d0d3521e461a2101 vfio/pci: implement huge_fault support
+09dfc8a5f2ce897005a94bf66cca4f91e4e03700 vfio/pci: Fallback huge faults for unaligned pfn
+62fb8adc43afad5fa1c9cadc6f3a8e9fb72af194 mm: Provide address mask in struct follow_pfnmap_args
+0fd06844de5d063cb384384e06a11ec7141a35d5 vfio/type1: Use mapping page mask for pfnmaps
+c1d9dac0db168198b6f63f460665256dedad9b6e vfio/pci: Align huge faults to order
+
+__CHANGES NOT IN UPSTREAM________________
+Porting to Rocky Linux 9, debranding and Rocky branding'
+Ensure aarch64 kernel is not compressed'
diff --git a/configs/kernel-5.14.0-aarch64-64k-debug.config b/configs/kernel-5.14.0-aarch64-64k-debug.config
index 787b193b8bca9..227706bfe5415 100644
--- a/configs/kernel-5.14.0-aarch64-64k-debug.config
+++ b/configs/kernel-5.14.0-aarch64-64k-debug.config
@@ -1076,6 +1076,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_USE_PERCPU_NUMA_NODE_ID=y
diff --git a/configs/kernel-5.14.0-aarch64-64k.config b/configs/kernel-5.14.0-aarch64-64k.config
index 39c43faf4a1d1..8974bca2568f7 100644
--- a/configs/kernel-5.14.0-aarch64-64k.config
+++ b/configs/kernel-5.14.0-aarch64-64k.config
@@ -1072,6 +1072,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_USE_PERCPU_NUMA_NODE_ID=y
diff --git a/configs/kernel-5.14.0-aarch64-debug.config b/configs/kernel-5.14.0-aarch64-debug.config
index 8b374663a4658..6f63b063221ee 100644
--- a/configs/kernel-5.14.0-aarch64-debug.config
+++ b/configs/kernel-5.14.0-aarch64-debug.config
@@ -1079,6 +1079,8 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
 CONFIG_THP_SWAP=y
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_USE_PERCPU_NUMA_NODE_ID=y
diff --git a/configs/kernel-5.14.0-aarch64.config b/configs/kernel-5.14.0-aarch64.config
index 4403dc2c2a26e..ab69d33420388 100644
--- a/configs/kernel-5.14.0-aarch64.config
+++ b/configs/kernel-5.14.0-aarch64.config
@@ -1075,6 +1075,8 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
 CONFIG_THP_SWAP=y
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_USE_PERCPU_NUMA_NODE_ID=y
diff --git a/configs/kernel-5.14.0-x86_64-debug.config b/configs/kernel-5.14.0-x86_64-debug.config
index 93ef181632d25..3403e9c20fb5b 100644
--- a/configs/kernel-5.14.0-x86_64-debug.config
+++ b/configs/kernel-5.14.0-x86_64-debug.config
@@ -1138,6 +1138,9 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
 CONFIG_THP_SWAP=y
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PUD_PFNMAP=y
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_USE_PERCPU_NUMA_NODE_ID=y
diff --git a/configs/kernel-5.14.0-x86_64.config b/configs/kernel-5.14.0-x86_64.config
index 8f5eccfacea09..33d762cce77bd 100644
--- a/configs/kernel-5.14.0-x86_64.config
+++ b/configs/kernel-5.14.0-x86_64.config
@@ -1133,6 +1133,9 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
 CONFIG_THP_SWAP=y
 # CONFIG_READ_ONLY_THP_FOR_FS is not set
+CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
+CONFIG_ARCH_SUPPORTS_PUD_PFNMAP=y
 CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_USE_PERCPU_NUMA_NODE_ID=y
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36442eeffb828..73aa4347a2be4 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -600,6 +600,9 @@ static bool turbo_is_disabled(void)
 {
 	u64 misc_en;
 
+	if (!cpu_feature_enabled(X86_FEATURE_IDA))
+		return true;
+
 	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
 
 	return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index ffda816e01193..c9eaba2276365 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -20,6 +20,7 @@
 #include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/pci.h>
+#include <linux/pfn_t.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -57,11 +58,6 @@ struct vfio_pci_vf_token {
 	int			users;
 };
 
-struct vfio_pci_mmap_vma {
-	struct vm_area_struct	*vma;
-	struct list_head	vma_next;
-};
-
 static inline bool vfio_vga_disabled(void)
 {
 #ifdef CONFIG_VFIO_PCI_VGA
@@ -1610,100 +1606,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_write);
 
-/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
-static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try)
+static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
 {
-	struct vfio_pci_mmap_vma *mmap_vma, *tmp;
-
-	/*
-	 * Lock ordering:
-	 * vma_lock is nested under mmap_lock for vm_ops callback paths.
-	 * The memory_lock semaphore is used by both code paths calling
-	 * into this function to zap vmas and the vm_ops.fault callback
-	 * to protect the memory enable state of the device.
-	 *
-	 * When zapping vmas we need to maintain the mmap_lock => vma_lock
-	 * ordering, which requires using vma_lock to walk vma_list to
-	 * acquire an mm, then dropping vma_lock to get the mmap_lock and
-	 * reacquiring vma_lock.  This logic is derived from similar
-	 * requirements in uverbs_user_mmap_disassociate().
-	 *
-	 * mmap_lock must always be the top-level lock when it is taken.
-	 * Therefore we can only hold the memory_lock write lock when
-	 * vma_list is empty, as we'd need to take mmap_lock to clear
-	 * entries.  vma_list can only be guaranteed empty when holding
-	 * vma_lock, thus memory_lock is nested under vma_lock.
-	 *
-	 * This enables the vm_ops.fault callback to acquire vma_lock,
-	 * followed by memory_lock read lock, while already holding
-	 * mmap_lock without risk of deadlock.
-	 */
-	while (1) {
-		struct mm_struct *mm = NULL;
-
-		if (try) {
-			if (!mutex_trylock(&vdev->vma_lock))
-				return 0;
-		} else {
-			mutex_lock(&vdev->vma_lock);
-		}
-		while (!list_empty(&vdev->vma_list)) {
-			mmap_vma = list_first_entry(&vdev->vma_list,
-						    struct vfio_pci_mmap_vma,
-						    vma_next);
-			mm = mmap_vma->vma->vm_mm;
-			if (mmget_not_zero(mm))
-				break;
+	struct vfio_device *core_vdev = &vdev->vdev;
+	loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
+	loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
+	loff_t len = end - start;
 
-			list_del(&mmap_vma->vma_next);
-			kfree(mmap_vma);
-			mm = NULL;
-		}
-		if (!mm)
-			return 1;
-		mutex_unlock(&vdev->vma_lock);
-
-		if (try) {
-			if (!mmap_read_trylock(mm)) {
-				mmput(mm);
-				return 0;
-			}
-		} else {
-			mmap_read_lock(mm);
-		}
-		if (try) {
-			if (!mutex_trylock(&vdev->vma_lock)) {
-				mmap_read_unlock(mm);
-				mmput(mm);
-				return 0;
-			}
-		} else {
-			mutex_lock(&vdev->vma_lock);
-		}
-		list_for_each_entry_safe(mmap_vma, tmp,
-					 &vdev->vma_list, vma_next) {
-			struct vm_area_struct *vma = mmap_vma->vma;
-
-			if (vma->vm_mm != mm)
-				continue;
-
-			list_del(&mmap_vma->vma_next);
-			kfree(mmap_vma);
-
-			zap_vma_ptes(vma, vma->vm_start,
-				     vma->vm_end - vma->vm_start);
-		}
-		mutex_unlock(&vdev->vma_lock);
-		mmap_read_unlock(mm);
-		mmput(mm);
-	}
+	unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
 }
 
 void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
 {
-	vfio_pci_zap_and_vma_lock(vdev, false);
 	down_write(&vdev->memory_lock);
-	mutex_unlock(&vdev->vma_lock);
+	vfio_pci_zap_bars(vdev);
 }
 
 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
@@ -1725,100 +1641,83 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c
 	up_write(&vdev->memory_lock);
 }
 
-/* Caller holds vma_lock */
-static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
-			      struct vm_area_struct *vma)
-{
-	struct vfio_pci_mmap_vma *mmap_vma;
-
-	mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
-	if (!mmap_vma)
-		return -ENOMEM;
-
-	mmap_vma->vma = vma;
-	list_add(&mmap_vma->vma_next, &vdev->vma_list);
-
-	return 0;
-}
-
-/*
- * Zap mmaps on open so that we can fault them in on access and therefore
- * our vma_list only tracks mappings accessed since last zap.
- */
-static void vfio_pci_mmap_open(struct vm_area_struct *vma)
-{
-	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-}
-
-static void vfio_pci_mmap_close(struct vm_area_struct *vma)
+static unsigned long vma_to_pfn(struct vm_area_struct *vma)
 {
 	struct vfio_pci_core_device *vdev = vma->vm_private_data;
-	struct vfio_pci_mmap_vma *mmap_vma;
+	int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+	u64 pgoff;
 
-	mutex_lock(&vdev->vma_lock);
-	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
-		if (mmap_vma->vma == vma) {
-			list_del(&mmap_vma->vma_next);
-			kfree(mmap_vma);
-			break;
-		}
-	}
-	mutex_unlock(&vdev->vma_lock);
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+
+	return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
 }
 
-static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
+static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
+					   unsigned int order)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct vfio_pci_core_device *vdev = vma->vm_private_data;
-	struct vfio_pci_mmap_vma *mmap_vma;
-	vm_fault_t ret = VM_FAULT_NOPAGE;
-
-	mutex_lock(&vdev->vma_lock);
-	down_read(&vdev->memory_lock);
-
-	/*
-	 * Memory region cannot be accessed if the low power feature is engaged
-	 * or memory access is disabled.
-	 */
-	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
-		ret = VM_FAULT_SIGBUS;
-		goto up_out;
+	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
+	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long pfn = vma_to_pfn(vma) + pgoff;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
+
+	if (order && (addr < vma->vm_start ||
+		      addr + (PAGE_SIZE << order) > vma->vm_end ||
+		      pfn & ((1 << order) - 1))) {
+		ret = VM_FAULT_FALLBACK;
+		goto out;
 	}
 
-	/*
-	 * We populate the whole vma on fault, so we need to test whether
-	 * the vma has already been mapped, such as for concurrent faults
-	 * to the same vma.  io_remap_pfn_range() will trigger a BUG_ON if
-	 * we ask it to fill the same range again.
-	 */
-	list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
-		if (mmap_vma->vma == vma)
-			goto up_out;
-	}
+	down_read(&vdev->memory_lock);
 
-	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start,
-			       vma->vm_page_prot)) {
-		ret = VM_FAULT_SIGBUS;
-		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-		goto up_out;
-	}
+	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
+		goto out_unlock;
 
-	if (__vfio_pci_add_vma(vdev, vma)) {
-		ret = VM_FAULT_OOM;
-		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+	switch (order) {
+	case 0:
+		ret = vmf_insert_pfn(vma, vmf->address, pfn);
+		break;
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+	case PMD_ORDER:
+		ret = vmf_insert_pfn_pmd(vmf,
+					 __pfn_to_pfn_t(pfn, PFN_DEV), false);
+		break;
+#endif
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+	case PUD_ORDER:
+		ret = vmf_insert_pfn_pud(vmf,
+					 __pfn_to_pfn_t(pfn, PFN_DEV), false);
+		break;
+#endif
+	default:
+		ret = VM_FAULT_FALLBACK;
 	}
 
-up_out:
+out_unlock:
 	up_read(&vdev->memory_lock);
-	mutex_unlock(&vdev->vma_lock);
+out:
+	dev_dbg_ratelimited(&vdev->pdev->dev,
+			   "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
+			    __func__, order,
+			    vma->vm_pgoff >>
+				(VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
+			    pgoff, (unsigned int)ret);
+
 	return ret;
 }
 
+static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
+{
+	return vfio_pci_mmap_huge_fault(vmf, 0);
+}
+
 static const struct vm_operations_struct vfio_pci_mmap_ops = {
-	.open = vfio_pci_mmap_open,
-	.close = vfio_pci_mmap_close,
-	.fault = vfio_pci_mmap_fault,
+	.fault = vfio_pci_mmap_page_fault,
+#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
+	.huge_fault = vfio_pci_mmap_huge_fault,
+#endif
 };
 
 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
@@ -1880,11 +1779,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 
 	vma->vm_private_data = vdev;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 
 	/*
-	 * See remap_pfn_range(), called from vfio_pci_fault() but we can't
-	 * change vm_flags within the fault handler.  Set them now.
+	 * Set vm_flags now, they should not be changed in the fault handler.
+	 * We want the same flags and page protection (decrypted above) as
+	 * io_remap_pfn_range() would set.
 	 *
 	 * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64,
 	 * allowing KVM stage 2 device mapping attributes to use Normal-NC
@@ -2202,8 +2102,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
 	mutex_init(&vdev->ioeventfds_lock);
 	INIT_LIST_HEAD(&vdev->dummy_resources_list);
 	INIT_LIST_HEAD(&vdev->ioeventfds_list);
-	mutex_init(&vdev->vma_lock);
-	INIT_LIST_HEAD(&vdev->vma_list);
 	INIT_LIST_HEAD(&vdev->sriov_pfs_item);
 	init_rwsem(&vdev->memory_lock);
 	xa_init(&vdev->ctx);
@@ -2219,7 +2117,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
 
 	mutex_destroy(&vdev->igate);
 	mutex_destroy(&vdev->ioeventfds_lock);
-	mutex_destroy(&vdev->vma_lock);
 	kfree(vdev->region);
 	kfree(vdev->pm_save);
 }
@@ -2497,26 +2394,15 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
 	return ret;
 }
 
-/*
- * We need to get memory_lock for each device, but devices can share mmap_lock,
- * therefore we need to zap and hold the vma_lock for each device, and only then
- * get each memory_lock.
- */
 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 				      struct vfio_pci_group_info *groups,
 				      struct iommufd_ctx *iommufd_ctx)
 {
-	struct vfio_pci_core_device *cur_mem;
-	struct vfio_pci_core_device *cur_vma;
-	struct vfio_pci_core_device *cur;
+	struct vfio_pci_core_device *vdev;
 	struct pci_dev *pdev;
-	bool is_mem = true;
 	int ret;
 
 	mutex_lock(&dev_set->lock);
-	cur_mem = list_first_entry(&dev_set->device_list,
-				   struct vfio_pci_core_device,
-				   vdev.dev_set_list);
 
 	pdev = vfio_pci_dev_set_resettable(dev_set);
 	if (!pdev) {
@@ -2533,7 +2419,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 	if (ret)
 		goto err_unlock;
 
-	list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
+	list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) {
 		bool owned;
 
 		/*
@@ -2557,38 +2443,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 		 * Otherwise, reset is not allowed.
 		 */
 		if (iommufd_ctx) {
-			int devid = vfio_iommufd_get_dev_id(&cur_vma->vdev,
+			int devid = vfio_iommufd_get_dev_id(&vdev->vdev,
 							    iommufd_ctx);
 
 			owned = (devid > 0 || devid == -ENOENT);
 		} else {
-			owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
+			owned = vfio_dev_in_groups(&vdev->vdev, groups);
 		}
 
 		if (!owned) {
 			ret = -EINVAL;
-			goto err_undo;
+			break;
 		}
 
 		/*
-		 * Locking multiple devices is prone to deadlock, runaway and
-		 * unwind if we hit contention.
+		 * Take the memory write lock for each device and zap BAR
+		 * mappings to prevent the user accessing the device while in
+		 * reset.  Locking multiple devices is prone to deadlock,
+		 * runaway and unwind if we hit contention.
 		 */
-		if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {
+		if (!down_write_trylock(&vdev->memory_lock)) {
 			ret = -EBUSY;
-			goto err_undo;
+			break;
 		}
+
+		vfio_pci_zap_bars(vdev);
 	}
-	cur_vma = NULL;
 
-	list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {
-		if (!down_write_trylock(&cur_mem->memory_lock)) {
-			ret = -EBUSY;
-			goto err_undo;
-		}
-		mutex_unlock(&cur_mem->vma_lock);
+	if (!list_entry_is_head(vdev,
+				&dev_set->device_list, vdev.dev_set_list)) {
+		vdev = list_prev_entry(vdev, vdev.dev_set_list);
+		goto err_undo;
 	}
-	cur_mem = NULL;
 
 	/*
 	 * The pci_reset_bus() will reset all the devices in the bus.
@@ -2599,25 +2485,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 	 * cause the PCI config space reset without restoring the original
 	 * state (saved locally in 'vdev->pm_save').
 	 */
-	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
-		vfio_pci_set_power_state(cur, PCI_D0);
+	list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
+		vfio_pci_set_power_state(vdev, PCI_D0);
 
 	ret = pci_reset_bus(pdev);
 
+	vdev = list_last_entry(&dev_set->device_list,
+			       struct vfio_pci_core_device, vdev.dev_set_list);
+
 err_undo:
-	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
-		if (cur == cur_mem)
-			is_mem = false;
-		if (cur == cur_vma)
-			break;
-		if (is_mem)
-			up_write(&cur->memory_lock);
-		else
-			mutex_unlock(&cur->vma_lock);
-	}
+	list_for_each_entry_from_reverse(vdev, &dev_set->device_list,
+					 vdev.dev_set_list)
+		up_write(&vdev->memory_lock);
+
+	list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
+		pm_runtime_put(&vdev->pdev->dev);
 
-	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
-		pm_runtime_put(&cur->pdev->dev);
 err_unlock:
 	mutex_unlock(&dev_set->lock);
 	return ret;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 6c6586af79532..f8b8f3bcc7803 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -105,9 +105,9 @@ struct vfio_dma {
 struct vfio_batch {
 	struct page		**pages;	/* for pin_user_pages_remote */
 	struct page		*fallback_page; /* if pages alloc fails */
-	int			capacity;	/* length of pages array */
-	int			size;		/* of batch currently */
-	int			offset;		/* of next entry in pages */
+	unsigned int		capacity;	/* length of pages array */
+	unsigned int		size;		/* of batch currently */
+	unsigned int		offset;		/* of next entry in pages */
 };
 
 struct vfio_iommu_group {
@@ -474,12 +474,12 @@ static int put_pfn(unsigned long pfn, int prot)
 
 #define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
 
-static void vfio_batch_init(struct vfio_batch *batch)
+static void __vfio_batch_init(struct vfio_batch *batch, bool single)
 {
 	batch->size = 0;
 	batch->offset = 0;
 
-	if (unlikely(disable_hugepages))
+	if (single || unlikely(disable_hugepages))
 		goto fallback;
 
 	batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
@@ -494,6 +494,16 @@ static void vfio_batch_init(struct vfio_batch *batch)
 	batch->capacity = 1;
 }
 
+static void vfio_batch_init(struct vfio_batch *batch)
+{
+	__vfio_batch_init(batch, false);
+}
+
+static void vfio_batch_init_single(struct vfio_batch *batch)
+{
+	__vfio_batch_init(batch, true);
+}
+
 static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
 {
 	while (batch->size) {
@@ -513,14 +523,12 @@ static void vfio_batch_fini(struct vfio_batch *batch)
 
 static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
 			    unsigned long vaddr, unsigned long *pfn,
-			    bool write_fault)
+			    unsigned long *addr_mask, bool write_fault)
 {
-	pte_t *ptep;
-	pte_t pte;
-	spinlock_t *ptl;
+	struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
 	int ret;
 
-	ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+	ret = follow_pfnmap_start(&args);
 	if (ret) {
 		bool unlocked = false;
 
@@ -534,43 +542,51 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
 		if (ret)
 			return ret;
 
-		ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+		ret = follow_pfnmap_start(&args);
 		if (ret)
 			return ret;
 	}
 
-	pte = ptep_get(ptep);
-
-	if (write_fault && !pte_write(pte))
+	if (write_fault && !args.writable) {
 		ret = -EFAULT;
-	else
-		*pfn = pte_pfn(pte);
+	} else {
+		*pfn = args.pfn;
+		*addr_mask = args.addr_mask;
+	}
 
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 	return ret;
 }
 
 /*
  * Returns the positive number of pfns successfully obtained or a negative
- * error code.
+ * error code.  The initial pfn is stored in the pfn arg.  For page-backed
+ * pfns, the provided batch is also updated to indicate the filled pages and
+ * initial offset.  For VM_PFNMAP pfns, only the returned number of pfns and
+ * returned initial pfn are provided; subsequent pfns are contiguous.
  */
-static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
-			  long npages, int prot, unsigned long *pfn,
-			  struct page **pages)
+static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
+			   unsigned long npages, int prot, unsigned long *pfn,
+			   struct vfio_batch *batch)
 {
+	unsigned long pin_pages = min_t(unsigned long, npages, batch->capacity);
 	struct vm_area_struct *vma;
 	unsigned int flags = 0;
-	int ret;
+	long ret;
 
 	if (prot & IOMMU_WRITE)
 		flags |= FOLL_WRITE;
 
 	mmap_read_lock(mm);
-	ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
-				    pages, NULL);
+	ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM,
+				    batch->pages, NULL);
 	if (ret > 0) {
-		*pfn = page_to_pfn(pages[0]);
+		*pfn = page_to_pfn(batch->pages[0]);
+		batch->size = ret;
+		batch->offset = 0;
 		goto done;
+	} else if (!ret) {
+		ret = -EFAULT;
 	}
 
 	vaddr = untagged_addr_remote(mm, vaddr);
@@ -579,15 +595,22 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
 	vma = vma_lookup(mm, vaddr);
 
 	if (vma && vma->vm_flags & VM_PFNMAP) {
-		ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
+		unsigned long addr_mask;
+
+		ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask,
+				       prot & IOMMU_WRITE);
 		if (ret == -EAGAIN)
 			goto retry;
 
 		if (!ret) {
-			if (is_invalid_reserved_pfn(*pfn))
-				ret = 1;
-			else
+			if (is_invalid_reserved_pfn(*pfn)) {
+				unsigned long epfn;
+
+				epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1;
+				ret = min_t(long, npages, epfn - *pfn);
+			} else {
 				ret = -EFAULT;
+			}
 		}
 	}
 done:
@@ -601,7 +624,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
  * first page and all consecutive pages with the same locking.
  */
 static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
-				  long npage, unsigned long *pfn_base,
+				  unsigned long npage, unsigned long *pfn_base,
 				  unsigned long limit, struct vfio_batch *batch)
 {
 	unsigned long pfn;
@@ -623,32 +646,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 		*pfn_base = 0;
 	}
 
+	if (unlikely(disable_hugepages))
+		npage = 1;
+
 	while (npage) {
 		if (!batch->size) {
 			/* Empty batch, so refill it. */
-			long req_pages = min_t(long, npage, batch->capacity);
-
-			ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
-					     &pfn, batch->pages);
+			ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot,
+					     &pfn, batch);
 			if (ret < 0)
 				goto unpin_out;
 
-			batch->size = ret;
-			batch->offset = 0;
-
 			if (!*pfn_base) {
 				*pfn_base = pfn;
 				rsvd = is_invalid_reserved_pfn(*pfn_base);
 			}
+
+			/* Handle pfnmap */
+			if (!batch->size) {
+				if (pfn != *pfn_base + pinned || !rsvd)
+					goto out;
+
+				pinned += ret;
+				npage -= ret;
+				vaddr += (PAGE_SIZE * ret);
+				iova += (PAGE_SIZE * ret);
+				continue;
+			}
 		}
 
 		/*
-		 * pfn is preset for the first iteration of this inner loop and
-		 * updated at the end to handle a VM_PFNMAP pfn.  In that case,
-		 * batch->pages isn't valid (there's no struct page), so allow
-		 * batch->pages to be touched only when there's more than one
-		 * pfn to check, which guarantees the pfns are from a
-		 * !VM_PFNMAP vma.
+		 * pfn is preset for the first iteration of this inner loop
+		 * due to the fact that vaddr_get_pfns() needs to provide the
+		 * initial pfn for pfnmaps.  Therefore to reduce redundancy,
+		 * the next pfn is fetched at the end of the loop.
+		 * A PageReserved() page could still qualify as page backed
+		 * and rsvd here, and therefore continues to use the batch.
 		 */
 		while (true) {
 			if (pfn != *pfn_base + pinned ||
@@ -683,21 +716,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 
 			pfn = page_to_pfn(batch->pages[batch->offset]);
 		}
-
-		if (unlikely(disable_hugepages))
-			break;
 	}
 
 out:
 	ret = vfio_lock_acct(dma, lock_acct, false);
 
 unpin_out:
-	if (batch->size == 1 && !batch->offset) {
-		/* May be a VM_PFNMAP pfn, which the batch can't remember. */
-		put_pfn(pfn, dma->prot);
-		batch->size = 0;
-	}
-
 	if (ret < 0) {
 		if (pinned && !rsvd) {
 			for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
@@ -712,7 +736,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 }
 
 static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
-				    unsigned long pfn, long npage,
+				    unsigned long pfn, unsigned long npage,
 				    bool do_accounting)
 {
 	long unlocked = 0, locked = 0;
@@ -735,7 +759,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
 static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
 				  unsigned long *pfn_base, bool do_accounting)
 {
-	struct page *pages[1];
+	struct vfio_batch batch;
 	struct mm_struct *mm;
 	int ret;
 
@@ -743,7 +767,9 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
 	if (!mmget_not_zero(mm))
 		return -ENODEV;
 
-	ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+	vfio_batch_init_single(&batch);
+
+	ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch);
 	if (ret != 1)
 		goto out;
 
@@ -762,6 +788,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
 	}
 
 out:
+	vfio_batch_fini(&batch);
 	mmput(mm);
 	return ret;
 }
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 429529f7a484c..245a99afe9ce8 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -158,6 +158,8 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
 extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
 				  struct cifsFileInfo **ret_file);
+extern int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
+				  struct file *file);
 extern unsigned int smbCalcSize(void *buf);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			struct TCP_Server_Info *server);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index dd6cb08fd4231..254347d16db68 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -685,15 +685,23 @@ int cifs_open(struct inode *inode, struct file *file)
 		rc = cifs_get_readable_path(tcon, full_path, &cfile);
 	}
 	if (rc == 0) {
-		if (file->f_flags == cfile->f_flags) {
+		unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
+		unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
+
+		if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) &&
+		    (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) {
 			file->private_data = cfile;
 			spin_lock(&CIFS_I(inode)->deferred_lock);
 			cifs_del_deferred_close(cfile);
 			spin_unlock(&CIFS_I(inode)->deferred_lock);
 			goto use_cache;
-		} else {
-			_cifsFileInfo_put(cfile, true, false);
 		}
+		_cifsFileInfo_put(cfile, true, false);
+	} else {
+		/* hard link on the defeered close file */
+		rc = cifs_get_hardlink_path(tcon, inode, file);
+		if (rc)
+			cifs_close_deferred_file(CIFS_I(inode));
 	}
 
 	if (server->oplocks)
@@ -1754,6 +1762,29 @@ cifs_move_llist(struct list_head *source, struct list_head *dest)
 		list_move(li, dest);
 }
 
+int
+cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
+				struct file *file)
+{
+	struct cifsFileInfo *open_file = NULL;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	int rc = 0;
+
+	spin_lock(&tcon->open_file_lock);
+	spin_lock(&cinode->open_file_lock);
+
+	list_for_each_entry(open_file, &cinode->openFileList, flist) {
+		if (file->f_flags == open_file->f_flags) {
+			rc = -EINVAL;
+			break;
+		}
+	}
+
+	spin_unlock(&cinode->open_file_lock);
+	spin_unlock(&tcon->open_file_lock);
+	return rc;
+}
+
 void
 cifs_free_llist(struct list_head *llist)
 {
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index fc789c0ac85b8..eaba832e03575 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -256,11 +256,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 	return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
 }
 
-static inline bool is_huge_zero_pud(pud_t pud)
-{
-	return false;
-}
-
 struct page *mm_get_huge_zero_page(struct mm_struct *mm);
 void mm_put_huge_zero_page(struct mm_struct *mm);
 
@@ -379,11 +374,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 	return false;
 }
 
-static inline bool is_huge_zero_pud(pud_t pud)
-{
-	return false;
-}
-
 static inline void mm_put_huge_zero_page(struct mm_struct *mm)
 {
 	return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 196c481ec1603..ab100f6bd25ad 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2427,15 +2427,42 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 int
 copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-int follow_pte(struct mm_struct *mm, unsigned long address,
-	       pte_t **ptepp, spinlock_t **ptlp);
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-	unsigned long *pfn);
-int follow_phys(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags, unsigned long *prot, resource_size_t *phys);
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 			void *buf, int len, int write);
 
+struct follow_pfnmap_args {
+	/**
+	 * Inputs:
+	 * @vma: Pointer to @vm_area_struct struct
+	 * @address: the virtual address to walk
+	 */
+	struct vm_area_struct *vma;
+	unsigned long address;
+	/**
+	 * Internals:
+	 *
+	 * The caller shouldn't touch any of these.
+	 */
+	spinlock_t *lock;
+	pte_t *ptep;
+	/**
+	 * Outputs:
+	 *
+	 * @pfn: the PFN of the address
+	 * @addr_mask: address mask covering pfn
+	 * @pgprot: the pgprot_t of the mapping
+	 * @writable: whether the mapping is writable
+	 * @special: whether the mapping is a special mapping (real PFN maps)
+	 */
+	unsigned long pfn;
+	unsigned long addr_mask;
+	pgprot_t pgprot;
+	bool writable;
+	bool special;
+};
+int follow_pfnmap_start(struct follow_pfnmap_args *args);
+void follow_pfnmap_end(struct follow_pfnmap_args *args);
+
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
@@ -2730,6 +2757,30 @@ static inline pte_t pte_mkspecial(pte_t pte)
 }
 #endif
 
+#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+	return false;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+	return pmd;
+}
+#endif	/* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+	return false;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+	return pud;
+}
+#endif	/* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
 #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
 static inline int pte_devmap(pte_t pte)
 {
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f62a9c9f3ce10..73eca45d91a10 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1685,6 +1685,18 @@ typedef unsigned int pgtbl_mod_mask;
 #define MAX_PTRS_PER_P4D PTRS_PER_P4D
 #endif
 
+#ifndef pte_pgprot
+#define pte_pgprot(x) ((pgprot_t) {0})
+#endif
+
+#ifndef pmd_pgprot
+#define pmd_pgprot(x) ((pgprot_t) {0})
+#endif
+
+#ifndef pud_pgprot
+#define pud_pgprot(x) ((pgprot_t) {0})
+#endif
+
 /* description of effects of mapping type and prot in current implementation.
  * this is due to the limited x86 page protection hardware.  The expected
  * behavior is in parens:
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 30c79194eecc8..fbb472dd99b36 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -93,8 +93,6 @@ struct vfio_pci_core_device {
 	struct list_head		sriov_pfs_item;
 	struct vfio_pci_core_device	*sriov_pf_core_dev;
 	struct notifier_block	nb;
-	struct mutex		vma_lock;
-	struct list_head	vma_list;
 	struct rw_semaphore	memory_lock;
 };
 
diff --git a/mm/Kconfig b/mm/Kconfig
index a91823e31f45b..ab0e794e3fc03 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -897,6 +897,19 @@ config READ_ONLY_THP_FOR_FS
 
 endif # TRANSPARENT_HUGEPAGE
 
+# TODO: Allow to be enabled without THP
+config ARCH_SUPPORTS_HUGE_PFNMAP
+	def_bool n
+	depends on TRANSPARENT_HUGEPAGE
+
+config ARCH_SUPPORTS_PMD_PFNMAP
+	def_bool y
+	depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE
+
+config ARCH_SUPPORTS_PUD_PFNMAP
+	def_bool y
+	depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+
 #
 # UP and nommu archs use km based percpu allocator
 #
diff --git a/mm/gup.c b/mm/gup.c
index ad7345cfba91d..16cdddef91585 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2903,6 +2903,9 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 	if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
+	if (pmd_special(orig))
+		return 0;
+
 	if (pmd_devmap(orig)) {
 		if (unlikely(flags & FOLL_LONGTERM))
 			return 0;
@@ -2947,6 +2950,9 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 	if (!pud_access_permitted(orig, flags & FOLL_WRITE))
 		return 0;
 
+	if (pud_special(orig))
+		return 0;
+
 	if (pud_devmap(orig)) {
 		if (unlikely(flags & FOLL_LONGTERM))
 			return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 20d9b3971dc88..c1cdbd21dddea 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -860,6 +860,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 	entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
 	if (pfn_t_devmap(pfn))
 		entry = pmd_mkdevmap(entry);
+	else
+		entry = pmd_mkspecial(entry);
 	if (write) {
 		entry = pmd_mkyoung(pmd_mkdirty(entry));
 		entry = maybe_pmd_mkwrite(entry, vma);
@@ -943,10 +945,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	ptl = pud_lock(mm, pud);
 	if (!pud_none(*pud)) {
 		if (write) {
-			if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
-				WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+			if (WARN_ON_ONCE(pud_pfn(*pud) != pfn_t_to_pfn(pfn)))
 				goto out_unlock;
-			}
 			entry = pud_mkyoung(*pud);
 			entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
 			if (pudp_set_access_flags(vma, addr, pud, entry, 1))
@@ -958,6 +958,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	entry = pud_mkhuge(pfn_t_pud(pfn, prot));
 	if (pfn_t_devmap(pfn))
 		entry = pud_mkdevmap(entry);
+	else
+		entry = pud_mkspecial(entry);
 	if (write) {
 		entry = pud_mkyoung(pud_mkdirty(entry));
 		entry = maybe_pud_mkwrite(entry, vma);
@@ -1070,6 +1072,24 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pgtable_t pgtable = NULL;
 	int ret = -ENOMEM;
 
+	pmd = pmdp_get_lockless(src_pmd);
+	if (unlikely(pmd_present(pmd) && pmd_special(pmd))) {
+		dst_ptl = pmd_lock(dst_mm, dst_pmd);
+		src_ptl = pmd_lockptr(src_mm, src_pmd);
+		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+		/*
+		 * No need to recheck the pmd, it can't change with write
+		 * mmap lock held here.
+		 *
+		 * Meanwhile, making sure it's not a CoW VMA with writable
+		 * mapping, otherwise it means either the anon page wrongly
+		 * applied special bit, or we made the PRIVATE mapping be
+		 * able to wrongly write to the backend MMIO.
+		 */
+		VM_WARN_ON_ONCE(is_cow_mapping(src_vma->vm_flags) && pmd_write(pmd));
+		goto set_pmd;
+	}
+
 	/* Skip if can be re-fill on fault */
 	if (!vma_is_anonymous(dst_vma))
 		return 0;
@@ -1150,7 +1170,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pmdp_set_wrprotect(src_mm, addr, src_pmd);
 	if (!userfaultfd_wp(dst_vma))
 		pmd = pmd_clear_uffd_wp(pmd);
-	pmd = pmd_mkold(pmd_wrprotect(pmd));
+	pmd = pmd_wrprotect(pmd);
+set_pmd:
+	pmd = pmd_mkold(pmd);
 	set_pmd_at(dst_mm, addr, dst_pmd, pmd);
 
 	ret = 0;
@@ -1235,21 +1257,15 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
 		goto out_unlock;
 
-	/*
-	 * When page table lock is held, the huge zero pud should not be
-	 * under splitting since we don't split the page itself, only pud to
-	 * a page table.
-	 */
-	if (is_huge_zero_pud(pud)) {
-		/* No huge zero pud yet */
-	}
-
 	/*
 	 * TODO: once we support anonymous pages, use page_try_dup_anon_rmap()
 	 * and split if duplicating fails.
 	 */
-	pudp_set_wrprotect(src_mm, addr, src_pud);
-	pud = pud_mkold(pud_wrprotect(pud));
+	if (is_cow_mapping(vma->vm_flags) && pud_write(pud)) {
+		pudp_set_wrprotect(src_mm, addr, src_pud);
+		pud = pud_wrprotect(pud);
+	}
+	pud = pud_mkold(pud);
 	set_pud_at(dst_mm, addr, dst_pud, pud);
 
 	ret = 0;
diff --git a/mm/memory.c b/mm/memory.c
index e2794e3b8919b..0338ced72b7df 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -659,11 +659,10 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 {
 	unsigned long pfn = pmd_pfn(pmd);
 
-	/*
-	 * There is no pmd_special() but there may be special pmds, e.g.
-	 * in a direct-access (dax) mapping, so let's just replicate the
-	 * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
-	 */
+	/* Currently it's only used for huge pfnmaps */
+	if (unlikely(pmd_special(pmd)))
+		return NULL;
+
 	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
 		if (vma->vm_flags & VM_MIXEDMAP) {
 			if (!pfn_valid(pfn))
@@ -5607,130 +5606,159 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
+static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
+				     spinlock_t *lock, pte_t *ptep,
+				     pgprot_t pgprot, unsigned long pfn_base,
+				     unsigned long addr_mask, bool writable,
+				     bool special)
+{
+	args->lock = lock;
+	args->ptep = ptep;
+	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
+	args->addr_mask = addr_mask;
+	args->pgprot = pgprot;
+	args->writable = writable;
+	args->special = special;
+}
+
+static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
+{
+#ifdef CONFIG_LOCKDEP
+	struct file *file = vma->vm_file;
+	struct address_space *mapping = file ? file->f_mapping : NULL;
+
+	if (mapping)
+		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
+			       lockdep_is_held(&vma->vm_mm->mmap_lock));
+	else
+		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
+#endif
+}
+
 /**
- * follow_pte - look up PTE at a user virtual address
- * @mm: the mm_struct of the target address space
- * @address: user virtual address
- * @ptepp: location to store found PTE
- * @ptlp: location to store the lock for the PTE
+ * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
+ * @args: Pointer to struct @follow_pfnmap_args
+ *
+ * The caller needs to setup args->vma and args->address to point to the
+ * virtual address as the target of such lookup.  On a successful return,
+ * the results will be put into other output fields.
+ *
+ * After the caller finished using the fields, the caller must invoke
+ * another follow_pfnmap_end() to proper releases the locks and resources
+ * of such look up request.
  *
- * On a successful return, the pointer to the PTE is stored in @ptepp;
- * the corresponding lock is taken and its location is stored in @ptlp.
- * The contents of the PTE are only stable until @ptlp is released;
- * any further use, if any, must be protected against invalidation
- * with MMU notifiers.
+ * During the start() and end() calls, the results in @args will be valid
+ * as proper locks will be held.  After the end() is called, all the fields
+ * in @follow_pfnmap_args will be invalid to be further accessed.  Further
+ * use of such information after end() may require proper synchronizations
+ * by the caller with page table updates, otherwise it can create a
+ * security bug.
+ *
+ * If the PTE maps a refcounted page, callers are responsible to protect
+ * against invalidation with MMU notifiers; otherwise access to the PFN at
+ * a later point in time can trigger use-after-free.
  *
  * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
- * should be taken for read.
+ * should be taken for read, and the mmap semaphore cannot be released
+ * before the end() is invoked.
  *
- * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
- * it is not a good general-purpose API.
+ * This function must not be used to modify PTE content.
  *
- * Return: zero on success, -ve otherwise.
+ * Return: zero on success, negative otherwise.
  */
-int follow_pte(struct mm_struct *mm, unsigned long address,
-	       pte_t **ptepp, spinlock_t **ptlp)
+int follow_pfnmap_start(struct follow_pfnmap_args *args)
 {
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep;
+	struct vm_area_struct *vma = args->vma;
+	unsigned long address = args->address;
+	struct mm_struct *mm = vma->vm_mm;
+	spinlock_t *lock;
+	pgd_t *pgdp;
+	p4d_t *p4dp, p4d;
+	pud_t *pudp, pud;
+	pmd_t *pmdp, pmd;
+	pte_t *ptep, pte;
 
-	pgd = pgd_offset(mm, address);
-	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+	pfnmap_lockdep_assert(vma);
+
+	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
 		goto out;
 
-	p4d = p4d_offset(pgd, address);
-	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		goto out;
+retry:
+	pgdp = pgd_offset(mm, address);
+	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
 		goto out;
 
-	pud = pud_offset(p4d, address);
-	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+	p4dp = p4d_offset(pgdp, address);
+	p4d = READ_ONCE(*p4dp);
+	if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
 		goto out;
 
-	pmd = pmd_offset(pud, address);
-	VM_BUG_ON(pmd_trans_huge(*pmd));
+	pudp = pud_offset(p4dp, address);
+	pud = READ_ONCE(*pudp);
+	if (pud_none(pud))
+		goto out;
+	if (pud_leaf(pud)) {
+		lock = pud_lock(mm, pudp);
+		if (!unlikely(pud_leaf(pud))) {
+			spin_unlock(lock);
+			goto retry;
+		}
+		pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
+				  pud_pfn(pud), PUD_MASK, pud_write(pud),
+				  pud_special(pud));
+		return 0;
+	}
+
+	pmdp = pmd_offset(pudp, address);
+	pmd = pmdp_get_lockless(pmdp);
+	if (pmd_leaf(pmd)) {
+		lock = pmd_lock(mm, pmdp);
+		if (!unlikely(pmd_leaf(pmd))) {
+			spin_unlock(lock);
+			goto retry;
+		}
+		pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
+				  pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
+				  pmd_special(pmd));
+		return 0;
+	}
 
-	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
+	ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
 	if (!ptep)
 		goto out;
-	if (!pte_present(ptep_get(ptep)))
+	pte = ptep_get(ptep);
+	if (!pte_present(pte))
 		goto unlock;
-	*ptepp = ptep;
+	pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
+			  pte_pfn(pte), PAGE_MASK, pte_write(pte),
+			  pte_special(pte));
 	return 0;
 unlock:
-	pte_unmap_unlock(ptep, *ptlp);
+	pte_unmap_unlock(ptep, lock);
 out:
 	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(follow_pte);
+EXPORT_SYMBOL_GPL(follow_pfnmap_start);
 
 /**
- * follow_pfn - look up PFN at a user virtual address
- * @vma: memory mapping
- * @address: user virtual address
- * @pfn: location to store found PFN
- *
- * Only IO mappings and raw PFN mappings are allowed.
- *
- * This function does not allow the caller to read the permissions
- * of the PTE.  Do not use it.
+ * follow_pfnmap_end(): End a follow_pfnmap_start() process
+ * @args: Pointer to struct @follow_pfnmap_args
  *
- * Return: zero and the pfn at @pfn on success, -ve otherwise.
+ * Must be used in pair of follow_pfnmap_start().  See the start() function
+ * above for more information.
  */
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-	unsigned long *pfn)
+void follow_pfnmap_end(struct follow_pfnmap_args *args)
 {
-	int ret = -EINVAL;
-	spinlock_t *ptl;
-	pte_t *ptep;
-
-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-		return ret;
-
-	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
-	if (ret)
-		return ret;
-	*pfn = pte_pfn(ptep_get(ptep));
-	pte_unmap_unlock(ptep, ptl);
-	return 0;
+	if (args->lock)
+		spin_unlock(args->lock);
+	if (args->ptep)
+		pte_unmap(args->ptep);
 }
-EXPORT_SYMBOL(follow_pfn);
+EXPORT_SYMBOL_GPL(follow_pfnmap_end);
 
 #ifdef CONFIG_HAVE_IOREMAP_PROT
-int follow_phys(struct vm_area_struct *vma,
-		unsigned long address, unsigned int flags,
-		unsigned long *prot, resource_size_t *phys)
-{
-	int ret = -EINVAL;
-	pte_t *ptep, pte;
-	spinlock_t *ptl;
-
-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-		goto out;
-
-	if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
-		goto out;
-	pte = ptep_get(ptep);
-
-	/* Never return PFNs of anon folios in COW mappings. */
-	if (vm_normal_folio(vma, address, pte))
-		goto unlock;
-
-	if ((flags & FOLL_WRITE) && !pte_write(pte))
-		goto unlock;
-
-	*prot = pgprot_val(pte_pgprot(pte));
-	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
-
-	ret = 0;
-unlock:
-	pte_unmap_unlock(ptep, ptl);
-out:
-	return ret;
-}
-
 /**
  * generic_access_phys - generic implementation for iomem mmap access
  * @vma: the vma to access
@@ -5749,37 +5777,34 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 	resource_size_t phys_addr;
 	unsigned long prot = 0;
 	void __iomem *maddr;
-	pte_t *ptep, pte;
-	spinlock_t *ptl;
 	int offset = offset_in_page(addr);
 	int ret = -EINVAL;
-
-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-		return -EINVAL;
+	bool writable;
+	struct follow_pfnmap_args args = { .vma = vma, .address = addr };
 
 retry:
-	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
+	if (follow_pfnmap_start(&args))
 		return -EINVAL;
-	pte = ptep_get(ptep);
-	pte_unmap_unlock(ptep, ptl);
+	prot = pgprot_val(args.pgprot);
+	phys_addr = (resource_size_t)args.pfn << PAGE_SHIFT;
+	writable = args.writable;
+	follow_pfnmap_end(&args);
 
-	prot = pgprot_val(pte_pgprot(pte));
-	phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
-
-	if ((write & FOLL_WRITE) && !pte_write(pte))
+	if ((write & FOLL_WRITE) && !writable)
 		return -EINVAL;
 
 	maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
 	if (!maddr)
 		return -ENOMEM;
 
-	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
+	if (follow_pfnmap_start(&args))
 		goto out_unmap;
 
-	if (!pte_same(pte, ptep_get(ptep))) {
-		pte_unmap_unlock(ptep, ptl);
+	if ((prot != pgprot_val(args.pgprot)) ||
+	    (phys_addr != (args.pfn << PAGE_SHIFT)) ||
+	    (writable != args.writable)) {
+		follow_pfnmap_end(&args);
 		iounmap(maddr);
-
 		goto retry;
 	}
 
@@ -5788,7 +5813,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 	else
 		memcpy_fromio(buf, maddr + offset, len);
 	ret = len;
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 out_unmap:
 	iounmap(maddr);
 
diff --git a/mm/nommu.c b/mm/nommu.c
index f3f6a7e976470..de9ecac05da5d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -110,27 +110,6 @@ unsigned int kobjsize(const void *objp)
 	return page_size(page);
 }
 
-/**
- * follow_pfn - look up PFN at a user virtual address
- * @vma: memory mapping
- * @address: user virtual address
- * @pfn: location to store found PFN
- *
- * Only IO mappings and raw PFN mappings are allowed.
- *
- * Returns zero and the pfn at @pfn on success, -ve otherwise.
- */
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-	unsigned long *pfn)
-{
-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-		return -EINVAL;
-
-	*pfn = address >> PAGE_SHIFT;
-	return 0;
-}
-EXPORT_SYMBOL(follow_pfn);
-
 LIST_HEAD(vmap_area_list);
 
 void vfree(const void *addr)
diff --git a/redhat/kernel.changelog-9.6 b/redhat/kernel.changelog-9.6
index 45aa1339d2d60..c750b086df2cd 100644
--- a/redhat/kernel.changelog-9.6
+++ b/redhat/kernel.changelog-9.6
@@ -1,3 +1,45 @@
+* Sat Jul 05 2025 CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> [5.14.0-570.26.1.el9_6]
+- x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes (CKI Backport Bot) [RHEL-98996] {CVE-2025-21991}
+- cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode (David Arcari) [RHEL-90212]
+- smb: client: fix perf regression with deferred closes (Paulo Alcantara) [RHEL-97482]
+- smb3 client: fix open hardlink on deferred close file error (Paulo Alcantara) [RHEL-97482]
+- Fix mmu notifiers for range-based invalidates (Jay Shin) [RHEL-93743]
+- vfio/pci: Align huge faults to order (Alex Williamson) [RHEL-88275]
+- vfio/type1: Use mapping page mask for pfnmaps (Alex Williamson) [RHEL-88275]
+- mm: Provide address mask in struct follow_pfnmap_args (Alex Williamson) [RHEL-88275]
+- vfio/type1: Use consistent types for page counts (Alex Williamson) [RHEL-88275]
+- vfio/type1: Use vfio_batch for vaddr_get_pfns() (Alex Williamson) [RHEL-88275]
+- vfio/type1: Convert all vaddr_get_pfns() callers to use vfio_batch (Alex Williamson) [RHEL-88275]
+- vfio/type1: Catch zero from pin_user_pages_remote() (Alex Williamson) [RHEL-88275]
+- vfio/pci: Fallback huge faults for unaligned pfn (Donald Dutile) [RHEL-85623]
+- vfio/pci: implement huge_fault support (Donald Dutile) [RHEL-85623]
+- vfio/pci: Remove unused struct 'vfio_pci_mmap_vma' (Donald Dutile) [RHEL-85623]
+- vfio/pci: Insert full vma on mmap'd MMIO fault (Donald Dutile) [RHEL-85623]
+- vfio/pci: Use unmap_mapping_range() (Donald Dutile) [RHEL-85623]
+- mm/arm64: support large pfn mappings (Donald Dutile) [RHEL-85623]
+- mm/x86: support large pfn mappings (Donald Dutile) [RHEL-85623]
+- mm: remove follow_pte() (Donald Dutile) [RHEL-85623]
+- mm: follow_pte() improvements (Donald Dutile) [RHEL-85623]
+- mm/access_process_vm: use the new follow_pfnmap API (Donald Dutile) [RHEL-85623]
+- vfio: use the new follow_pfnmap API (Donald Dutile) [RHEL-85623]
+- mm/x86/pat: use the new follow_pfnmap API (Donald Dutile) [RHEL-85623]
+- s390/pci_mmio: use follow_pfnmap API (Donald Dutile) [RHEL-85623]
+- KVM: use follow_pfnmap API (Donald Dutile) [RHEL-85623]
+- mm: pass VMA instead of MM to follow_pte() (Donald Dutile) [RHEL-85623]
+- mm: move follow_phys to arch/x86/mm/pat/memtype.c (Donald Dutile) [RHEL-85623]
+- mm: fix follow_pfnmap API lockdep assert (Donald Dutile) [RHEL-85623]
+- mm: new follow_pfnmap API (Donald Dutile) [RHEL-85623]
+- mm: remove follow_pfn (Donald Dutile) [RHEL-85623]
+- mm: always define pxx_pgprot() (Donald Dutile) [RHEL-85623]
+- mm/huge_memory: check pmd_special() only after pmd_present() (Donald Dutile) [RHEL-85623]
+- mm/fork: accept huge pfnmap entries (Donald Dutile) [RHEL-85623]
+- mm/pagewalk: check pfnmap for folio_walk_start() (Donald Dutile) [RHEL-85623]
+- mm/gup: detect huge pfnmap entries in gup-fast (Donald Dutile) [RHEL-85623]
+- mm: mark special bits for huge pfn mappings when inject (Donald Dutile) [RHEL-85623]
+- mm: drop is_huge_zero_pud() (Donald Dutile) [RHEL-85623]
+- mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud (Donald Dutile) [RHEL-85623]
+Resolves: RHEL-85623, RHEL-88275, RHEL-90212, RHEL-93743, RHEL-97482, RHEL-98996
+
 * Sat Jun 28 2025 CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> [5.14.0-570.25.1.el9_6]
 - udf: Fix a slab-out-of-bounds write bug in udf_find_entry() (CKI Backport Bot) [RHEL-99124] {CVE-2022-49846}
 - vmxnet3: Fix malformed packet sizing in vmxnet3_process_xdp (CKI Backport Bot) [RHEL-97110] {CVE-2025-37799}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b163a079fe65e..279572294aba3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2878,13 +2878,11 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 			       unsigned long addr, bool write_fault,
 			       bool *writable, kvm_pfn_t *p_pfn)
 {
+	struct follow_pfnmap_args args = { .vma = vma, .address = addr };
 	kvm_pfn_t pfn;
-	pte_t *ptep;
-	pte_t pte;
-	spinlock_t *ptl;
 	int r;
 
-	r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
+	r = follow_pfnmap_start(&args);
 	if (r) {
 		/*
 		 * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -2899,21 +2897,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 		if (r)
 			return r;
 
-		r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
+		r = follow_pfnmap_start(&args);
 		if (r)
 			return r;
 	}
 
-	pte = ptep_get(ptep);
-
-	if (write_fault && !pte_write(pte)) {
+	if (write_fault && !args.writable) {
 		pfn = KVM_PFN_ERR_RO_FAULT;
 		goto out;
 	}
 
 	if (writable)
-		*writable = pte_write(pte);
-	pfn = pte_pfn(pte);
+		*writable = args.writable;
+	pfn = args.pfn;
 
 	/*
 	 * Get a reference here because callers of *hva_to_pfn* and
@@ -2934,9 +2930,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 	 */
 	if (!kvm_try_get_pfn(pfn))
 		r = -EFAULT;
-
 out:
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 	*p_pfn = pfn;
 
 	return r;