Skip to content

Commit bb3aadf

Browse files
committed
x86/mm: Start actually marking _PAGE_SAVED_DIRTY
The recently introduced _PAGE_SAVED_DIRTY should be used instead of the HW Dirty bit whenever a PTE is Write=0, in order to not inadvertently create shadow stack PTEs. Update pte_mk*() helpers to do this, and apply the same changes to pmd and pud. Since there is no x86 version of pte_mkwrite() to hold this arch specific logic, create one. Add it to x86/mm/pgtable.c instead of x86/asm/include/pgtable.h as future patches will require it to live in pgtable.c and it will make the diff easier for reviewers. Since CPUs without shadow stack support could create Write=0,Dirty=1 PTEs, only return true for pte_shstk() if the CPU also supports shadow stack. This will prevent these HW creates PTEs as showing as true for pte_write(). For pte_modify() this is a bit trickier. It takes a "raw" pgprot_t which was not necessarily created with any of the existing PTE bit helpers. That means that it can return a pte_t with Write=0,Dirty=1, a shadow stack PTE, when it did not intend to create one. Modify it to also move _PAGE_DIRTY to _PAGE_SAVED_DIRTY. To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid: 1. Marking Write=0 PTEs Dirty=1 2. Marking Dirty=1 PTEs Write=0 The first case cannot happen as the existing behavior of pte_modify() is to filter out any Dirty bit passed in newprot. Handle the second case by shifting _PAGE_DIRTY=1 to _PAGE_SAVED_DIRTY=1 if the PTE was write protected by the pte_modify() call. Apply the same changes to pmd_modify(). Co-developed-by: Yu-cheng Yu <[email protected]> Signed-off-by: Yu-cheng Yu <[email protected]> Signed-off-by: Rick Edgecombe <[email protected]> Signed-off-by: Dave Hansen <[email protected]> Acked-by: Mike Rapoport (IBM) <[email protected]> Tested-by: Pengfei Xu <[email protected]> Tested-by: John Allen <[email protected]> Tested-by: Kees Cook <[email protected]> Link: https://lore.kernel.org/all/20230613001108.3040476-13-rick.p.edgecombe%40intel.com
1 parent 1f6f66f commit bb3aadf

File tree

2 files changed

+144
-21
lines changed

2 files changed

+144
-21
lines changed

arch/x86/include/asm/pgtable.h

Lines changed: 130 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -125,19 +125,32 @@ extern pmdval_t early_pmd_flags;
125125
* The following only work if pte_present() is true.
126126
* Undefined behaviour if not..
127127
*/
128-
static inline int pte_dirty(pte_t pte)
128+
static inline bool pte_dirty(pte_t pte)
129129
{
130-
return pte_flags(pte) & _PAGE_DIRTY;
130+
return pte_flags(pte) & _PAGE_DIRTY_BITS;
131+
}
132+
133+
static inline bool pte_shstk(pte_t pte)
134+
{
135+
return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
136+
(pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
131137
}
132138

133139
static inline int pte_young(pte_t pte)
134140
{
135141
return pte_flags(pte) & _PAGE_ACCESSED;
136142
}
137143

138-
static inline int pmd_dirty(pmd_t pmd)
144+
static inline bool pmd_dirty(pmd_t pmd)
145+
{
146+
return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
147+
}
148+
149+
static inline bool pmd_shstk(pmd_t pmd)
139150
{
140-
return pmd_flags(pmd) & _PAGE_DIRTY;
151+
return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
152+
(pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
153+
(_PAGE_DIRTY | _PAGE_PSE);
141154
}
142155

143156
#define pmd_young pmd_young
@@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd)
146159
return pmd_flags(pmd) & _PAGE_ACCESSED;
147160
}
148161

149-
static inline int pud_dirty(pud_t pud)
162+
static inline bool pud_dirty(pud_t pud)
150163
{
151-
return pud_flags(pud) & _PAGE_DIRTY;
164+
return pud_flags(pud) & _PAGE_DIRTY_BITS;
152165
}
153166

154167
static inline int pud_young(pud_t pud)
@@ -158,13 +171,21 @@ static inline int pud_young(pud_t pud)
158171

159172
static inline int pte_write(pte_t pte)
160173
{
161-
return pte_flags(pte) & _PAGE_RW;
174+
/*
175+
* Shadow stack pages are logically writable, but do not have
176+
* _PAGE_RW. Check for them separately from _PAGE_RW itself.
177+
*/
178+
return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
162179
}
163180

164181
#define pmd_write pmd_write
165182
static inline int pmd_write(pmd_t pmd)
166183
{
167-
return pmd_flags(pmd) & _PAGE_RW;
184+
/*
185+
* Shadow stack pages are logically writable, but do not have
186+
* _PAGE_RW. Check for them separately from _PAGE_RW itself.
187+
*/
188+
return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
168189
}
169190

170191
#define pud_write pud_write
@@ -351,7 +372,14 @@ static inline pte_t pte_clear_saveddirty(pte_t pte)
351372

352373
static inline pte_t pte_wrprotect(pte_t pte)
353374
{
354-
return pte_clear_flags(pte, _PAGE_RW);
375+
pte = pte_clear_flags(pte, _PAGE_RW);
376+
377+
/*
378+
* Blindly clearing _PAGE_RW might accidentally create
379+
* a shadow stack PTE (Write=0,Dirty=1). Move the hardware
380+
* dirty value to the software bit, if present.
381+
*/
382+
return pte_mksaveddirty(pte);
355383
}
356384

357385
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -389,7 +417,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte)
389417

390418
static inline pte_t pte_mkclean(pte_t pte)
391419
{
392-
return pte_clear_flags(pte, _PAGE_DIRTY);
420+
return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
393421
}
394422

395423
static inline pte_t pte_mkold(pte_t pte)
@@ -404,7 +432,16 @@ static inline pte_t pte_mkexec(pte_t pte)
404432

405433
static inline pte_t pte_mkdirty(pte_t pte)
406434
{
407-
return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
435+
pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
436+
437+
return pte_mksaveddirty(pte);
438+
}
439+
440+
static inline pte_t pte_mkwrite_shstk(pte_t pte)
441+
{
442+
pte = pte_clear_flags(pte, _PAGE_RW);
443+
444+
return pte_set_flags(pte, _PAGE_DIRTY);
408445
}
409446

410447
static inline pte_t pte_mkyoung(pte_t pte)
@@ -417,6 +454,10 @@ static inline pte_t pte_mkwrite_novma(pte_t pte)
417454
return pte_set_flags(pte, _PAGE_RW);
418455
}
419456

457+
struct vm_area_struct;
458+
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
459+
#define pte_mkwrite pte_mkwrite
460+
420461
static inline pte_t pte_mkhuge(pte_t pte)
421462
{
422463
return pte_set_flags(pte, _PAGE_PSE);
@@ -481,7 +522,14 @@ static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
481522

482523
static inline pmd_t pmd_wrprotect(pmd_t pmd)
483524
{
484-
return pmd_clear_flags(pmd, _PAGE_RW);
525+
pmd = pmd_clear_flags(pmd, _PAGE_RW);
526+
527+
/*
528+
* Blindly clearing _PAGE_RW might accidentally create
529+
* a shadow stack PMD (RW=0, Dirty=1). Move the hardware
530+
* dirty value to the software bit.
531+
*/
532+
return pmd_mksaveddirty(pmd);
485533
}
486534

487535
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
@@ -508,12 +556,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd)
508556

509557
static inline pmd_t pmd_mkclean(pmd_t pmd)
510558
{
511-
return pmd_clear_flags(pmd, _PAGE_DIRTY);
559+
return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
512560
}
513561

514562
static inline pmd_t pmd_mkdirty(pmd_t pmd)
515563
{
516-
return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
564+
pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
565+
566+
return pmd_mksaveddirty(pmd);
567+
}
568+
569+
static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
570+
{
571+
pmd = pmd_clear_flags(pmd, _PAGE_RW);
572+
573+
return pmd_set_flags(pmd, _PAGE_DIRTY);
517574
}
518575

519576
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
@@ -536,6 +593,9 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
536593
return pmd_set_flags(pmd, _PAGE_RW);
537594
}
538595

596+
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
597+
#define pmd_mkwrite pmd_mkwrite
598+
539599
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
540600
{
541601
pudval_t v = native_pud_val(pud);
@@ -575,17 +635,26 @@ static inline pud_t pud_mkold(pud_t pud)
575635

576636
static inline pud_t pud_mkclean(pud_t pud)
577637
{
578-
return pud_clear_flags(pud, _PAGE_DIRTY);
638+
return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
579639
}
580640

581641
static inline pud_t pud_wrprotect(pud_t pud)
582642
{
583-
return pud_clear_flags(pud, _PAGE_RW);
643+
pud = pud_clear_flags(pud, _PAGE_RW);
644+
645+
/*
646+
* Blindly clearing _PAGE_RW might accidentally create
647+
* a shadow stack PUD (RW=0, Dirty=1). Move the hardware
648+
* dirty value to the software bit.
649+
*/
650+
return pud_mksaveddirty(pud);
584651
}
585652

586653
static inline pud_t pud_mkdirty(pud_t pud)
587654
{
588-
return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
655+
pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
656+
657+
return pud_mksaveddirty(pud);
589658
}
590659

591660
static inline pud_t pud_mkdevmap(pud_t pud)
@@ -605,7 +674,9 @@ static inline pud_t pud_mkyoung(pud_t pud)
605674

606675
static inline pud_t pud_mkwrite(pud_t pud)
607676
{
608-
return pud_set_flags(pud, _PAGE_RW);
677+
pud = pud_set_flags(pud, _PAGE_RW);
678+
679+
return pud_clear_saveddirty(pud);
609680
}
610681

611682
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -722,6 +793,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
722793
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
723794
{
724795
pteval_t val = pte_val(pte), oldval = val;
796+
pte_t pte_result;
725797

726798
/*
727799
* Chop off the NX bit (if present), and add the NX portion of
@@ -730,17 +802,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
730802
val &= _PAGE_CHG_MASK;
731803
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
732804
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
733-
return __pte(val);
805+
806+
pte_result = __pte(val);
807+
808+
/*
809+
* To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
810+
* 1. Marking Write=0 PTEs Dirty=1
811+
* 2. Marking Dirty=1 PTEs Write=0
812+
*
813+
* The first case cannot happen because the _PAGE_CHG_MASK will filter
814+
* out any Dirty bit passed in newprot. Handle the second case by
815+
* going through the mksaveddirty exercise. Only do this if the old
816+
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
817+
*/
818+
if (oldval & _PAGE_RW)
819+
pte_result = pte_mksaveddirty(pte_result);
820+
else
821+
pte_result = pte_clear_saveddirty(pte_result);
822+
823+
return pte_result;
734824
}
735825

736826
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
737827
{
738828
pmdval_t val = pmd_val(pmd), oldval = val;
829+
pmd_t pmd_result;
739830

740-
val &= _HPAGE_CHG_MASK;
831+
val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
741832
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
742833
val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
743-
return __pmd(val);
834+
835+
pmd_result = __pmd(val);
836+
837+
/*
838+
* To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
839+
* 1. Marking Write=0 PMDs Dirty=1
840+
* 2. Marking Dirty=1 PMDs Write=0
841+
*
842+
* The first case cannot happen because the _PAGE_CHG_MASK will filter
843+
* out any Dirty bit passed in newprot. Handle the second case by
844+
* going through the mksaveddirty exercise. Only do this if the old
845+
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
846+
*/
847+
if (oldval & _PAGE_RW)
848+
pmd_result = pmd_mksaveddirty(pmd_result);
849+
else
850+
pmd_result = pmd_clear_saveddirty(pmd_result);
851+
852+
return pmd_result;
744853
}
745854

746855
/*

arch/x86/mm/pgtable.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,3 +872,17 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
872872

873873
#endif /* CONFIG_X86_64 */
874874
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
875+
876+
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
877+
{
878+
pte = pte_mkwrite_novma(pte);
879+
880+
return pte_clear_saveddirty(pte);
881+
}
882+
883+
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
884+
{
885+
pmd = pmd_mkwrite_novma(pmd);
886+
887+
return pmd_clear_saveddirty(pmd);
888+
}

0 commit comments

Comments
 (0)