@@ -105,7 +105,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
105105/*
106106 * The following only work if pte_present(). Undefined behaviour otherwise.
107107 */
108- #define pte_present (pte ) (!!(pte_val( pte) & (PTE_VALID | PTE_PROT_NONE) ))
108+ #define pte_present (pte ) (pte_valid( pte) || pte_present_invalid(pte ))
109109#define pte_young (pte ) (!!(pte_val(pte) & PTE_AF))
110110#define pte_special (pte ) (!!(pte_val(pte) & PTE_SPECIAL))
111111#define pte_write (pte ) (!!(pte_val(pte) & PTE_WRITE))
@@ -132,6 +132,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
132132#define pte_dirty (pte ) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
133133
134134#define pte_valid (pte ) (!!(pte_val(pte) & PTE_VALID))
135+ #define pte_present_invalid (pte ) \
136+ ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID)
135137/*
136138 * Execute-only user mappings do not have the PTE_USER bit set. All valid
137139 * kernel mappings have the PTE_UXN bit set.
@@ -261,6 +263,13 @@ static inline pte_t pte_mkpresent(pte_t pte)
261263 return set_pte_bit (pte , __pgprot (PTE_VALID ));
262264}
263265
266+ static inline pte_t pte_mkinvalid (pte_t pte )
267+ {
268+ pte = set_pte_bit (pte , __pgprot (PTE_PRESENT_INVALID ));
269+ pte = clear_pte_bit (pte , __pgprot (PTE_VALID ));
270+ return pte ;
271+ }
272+
264273static inline pmd_t pmd_mkcont (pmd_t pmd )
265274{
266275 return __pmd (pmd_val (pmd ) | PMD_SECT_CONT );
@@ -271,9 +280,31 @@ static inline pte_t pte_mkdevmap(pte_t pte)
271280 return set_pte_bit (pte , __pgprot (PTE_DEVMAP | PTE_SPECIAL ));
272281}
273282
274- static inline void __set_pte (pte_t * ptep , pte_t pte )
283+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
284+ static inline int pte_uffd_wp (pte_t pte )
285+ {
286+ return !!(pte_val (pte ) & PTE_UFFD_WP );
287+ }
288+
289+ static inline pte_t pte_mkuffd_wp (pte_t pte )
290+ {
291+ return pte_wrprotect (set_pte_bit (pte , __pgprot (PTE_UFFD_WP )));
292+ }
293+
294+ static inline pte_t pte_clear_uffd_wp (pte_t pte )
295+ {
296+ return clear_pte_bit (pte , __pgprot (PTE_UFFD_WP ));
297+ }
298+ #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
299+
300+ static inline void __set_pte_nosync (pte_t * ptep , pte_t pte )
275301{
276302 WRITE_ONCE (* ptep , pte );
303+ }
304+
305+ static inline void __set_pte (pte_t * ptep , pte_t pte )
306+ {
307+ __set_pte_nosync (ptep , pte );
277308
278309 /*
279310 * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -463,13 +494,39 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
463494 return clear_pte_bit (pte , __pgprot (PTE_SWP_EXCLUSIVE ));
464495}
465496
497+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
498+ static inline pte_t pte_swp_mkuffd_wp (pte_t pte )
499+ {
500+ return set_pte_bit (pte , __pgprot (PTE_SWP_UFFD_WP ));
501+ }
502+
503+ static inline int pte_swp_uffd_wp (pte_t pte )
504+ {
505+ return !!(pte_val (pte ) & PTE_SWP_UFFD_WP );
506+ }
507+
508+ static inline pte_t pte_swp_clear_uffd_wp (pte_t pte )
509+ {
510+ return clear_pte_bit (pte , __pgprot (PTE_SWP_UFFD_WP ));
511+ }
512+ #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
513+
466514#ifdef CONFIG_NUMA_BALANCING
467515/*
468516 * See the comment in include/linux/pgtable.h
469517 */
470518static inline int pte_protnone (pte_t pte )
471519{
472- return (pte_val (pte ) & (PTE_VALID | PTE_PROT_NONE )) == PTE_PROT_NONE ;
520+ /*
521+ * pte_present_invalid() tells us that the pte is invalid from HW
522+ * perspective but present from SW perspective, so the fields are to be
523+ * interpretted as per the HW layout. The second 2 checks are the unique
524+ * encoding that we use for PROT_NONE. It is insufficient to only use
525+ * the first check because we share the same encoding scheme with pmds
526+ * which support pmd_mkinvalid(), so can be present-invalid without
527+ * being PROT_NONE.
528+ */
529+ return pte_present_invalid (pte ) && !pte_user (pte ) && !pte_user_exec (pte );
473530}
474531
475532static inline int pmd_protnone (pmd_t pmd )
@@ -478,12 +535,7 @@ static inline int pmd_protnone(pmd_t pmd)
478535}
479536#endif
480537
481- #define pmd_present_invalid (pmd ) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
482-
483- static inline int pmd_present (pmd_t pmd )
484- {
485- return pte_present (pmd_pte (pmd )) || pmd_present_invalid (pmd );
486- }
538+ #define pmd_present (pmd ) pte_present(pmd_pte(pmd))
487539
488540/*
489541 * THP definitions.
@@ -508,14 +560,16 @@ static inline int pmd_trans_huge(pmd_t pmd)
508560#define pmd_mkclean (pmd ) pte_pmd(pte_mkclean(pmd_pte(pmd)))
509561#define pmd_mkdirty (pmd ) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
510562#define pmd_mkyoung (pmd ) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
511-
512- static inline pmd_t pmd_mkinvalid (pmd_t pmd )
513- {
514- pmd = set_pmd_bit (pmd , __pgprot (PMD_PRESENT_INVALID ));
515- pmd = clear_pmd_bit (pmd , __pgprot (PMD_SECT_VALID ));
516-
517- return pmd ;
518- }
563+ #define pmd_mkinvalid (pmd ) pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
564+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
565+ #define pmd_uffd_wp (pmd ) pte_uffd_wp(pmd_pte(pmd))
566+ #define pmd_mkuffd_wp (pmd ) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)))
567+ #define pmd_clear_uffd_wp (pmd ) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)))
568+ #define pmd_swp_uffd_wp (pmd ) pte_swp_uffd_wp(pmd_pte(pmd))
569+ #define pmd_swp_mkuffd_wp (pmd ) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)))
570+ #define pmd_swp_clear_uffd_wp (pmd ) \
571+ pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)))
572+ #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
519573
520574#define pmd_thp_or_huge (pmd ) (pmd_huge(pmd) || pmd_trans_huge(pmd))
521575
@@ -760,6 +814,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
760814
761815#else
762816
817+ #define pud_valid (pud ) false
763818#define pud_page_paddr (pud ) ({ BUILD_BUG(); 0; })
764819#define pud_user_exec (pud ) pud_user(pud) /* Always 0 with folding */
765820
@@ -1005,6 +1060,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
10051060
10061061static inline bool pgtable_l5_enabled (void ) { return false; }
10071062
1063+ #define p4d_index (addr ) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
1064+
10081065/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
10091066#define p4d_set_fixmap (addr ) NULL
10101067#define p4d_set_fixmap_offset (p4dp , addr ) ((p4d_t *)p4dp)
@@ -1027,8 +1084,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
10271084 * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
10281085 */
10291086 const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
1030- PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP |
1031- PTE_ATTRINDX_MASK ;
1087+ PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
1088+ PTE_GP | PTE_ATTRINDX_MASK ;
10321089 /* preserve the hardware dirty information */
10331090 if (pte_hw_dirty (pte ))
10341091 pte = set_pte_bit (pte , __pgprot (PTE_DIRTY ));
@@ -1076,17 +1133,17 @@ static inline int pgd_devmap(pgd_t pgd)
10761133#ifdef CONFIG_PAGE_TABLE_CHECK
10771134static inline bool pte_user_accessible_page (pte_t pte )
10781135{
1079- return pte_present (pte ) && (pte_user (pte ) || pte_user_exec (pte ));
1136+ return pte_valid (pte ) && (pte_user (pte ) || pte_user_exec (pte ));
10801137}
10811138
10821139static inline bool pmd_user_accessible_page (pmd_t pmd )
10831140{
1084- return pmd_leaf (pmd ) && !pmd_present_invalid (pmd ) && (pmd_user (pmd ) || pmd_user_exec (pmd ));
1141+ return pmd_valid (pmd ) && !pmd_table (pmd ) && (pmd_user (pmd ) || pmd_user_exec (pmd ));
10851142}
10861143
10871144static inline bool pud_user_accessible_page (pud_t pud )
10881145{
1089- return pud_leaf (pud ) && (pud_user (pud ) || pud_user_exec (pud ));
1146+ return pud_valid ( pud ) && ! pud_table (pud ) && (pud_user (pud ) || pud_user_exec (pud ));
10901147}
10911148#endif
10921149
@@ -1248,15 +1305,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
12481305 * Encode and decode a swap entry:
12491306 * bits 0-1: present (must be zero)
12501307 * bits 2: remember PG_anon_exclusive
1251- * bits 3-7: swap type
1252- * bits 8-57: swap offset
1253- * bit 58: PTE_PROT_NONE (must be zero)
1308+ * bit 3: remember uffd-wp state
1309+ * bits 6-10: swap type
1310+ * bit 11: PTE_PRESENT_INVALID (must be zero)
1311+ * bits 12-61: swap offset
12541312 */
1255- #define __SWP_TYPE_SHIFT 3
1313+ #define __SWP_TYPE_SHIFT 6
12561314#define __SWP_TYPE_BITS 5
1257- #define __SWP_OFFSET_BITS 50
12581315#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
1259- #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
1316+ #define __SWP_OFFSET_SHIFT 12
1317+ #define __SWP_OFFSET_BITS 50
12601318#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
12611319
12621320#define __swp_type (x ) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
0 commit comments