Skip to content

Commit 03e6db1

Browse files
gerald-schaeferhcahca
authored andcommitted
s390/mm: Introduce region-third and segment table entry present bits
Introduce region-third and segment table entry present SW bits, and adjust pmd/pud_present() accordingly. Also add pmd/pud_present() checks to pmd/pud_leaf(), to return false for future swap entries. Same logic applies to pmd_trans_huge(), make that return pmd_leaf() instead of duplicating the same check. huge_pte_offset() also needs to be adjusted, current code would return NULL for !pud_present(). Use the same logic as in the generic version, which allows for !pud_present() swap entries. Similar to PTE, bit 63 can be used for the new SW present bit in region and segment table entries. For segment-table entries (PMD) the architecture says that "Bits 62-63 are available for programming", so they are safe to use. The same is true for large leaf region-third-table entries (PUD). However, for non-leaf region-third-table entries, bits 62-63 indicate the TABLE LENGTH and both must be set to 1. But such entries would always be considered as present, so it is safe to use bit 63 as PRESENT bit for PUD. They also should not conflict with bit 62 potentially later used for preserving SOFT_DIRTY in swap entries, because they are not swap entries. Valid PMDs / PUDs should always have the present bit set, so add it to the various pgprot defines, and also _SEGMENT_ENTRY which is OR'ed e.g. in pmd_populate(). _REGION3_ENTRY wouldn't need any change, as the present bit is already included in the TABLE LENGTH, but also explicitly add it there, for completeness, and just in case the bit would ever be changed. gmap code needs some adjustment, to also OR the _SEGMENT_ENTRY, like it is already done gmap_shadow_pgt() when creating new PMDs, but not in __gmap_link(). Otherwise, the gmap PMDs would not be considered present, e.g. when using pmd_leaf() checks in gmap code. The various WARN_ON checks in gmap code also need adjustment, to tolerate the new present bit. This is a prerequisite for hugetlbfs PTE_MARKER support on s390, which is needed to fix a regression introduced with commit 8a13897 ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs"). That commit depends on the availability of swap entries for hugetlbfs, which were not available for s390 so far. Reviewed-by: Alexander Gordeev <[email protected]> Signed-off-by: Gerald Schaefer <[email protected]> Signed-off-by: Heiko Carstens <[email protected]>
1 parent ae1b9fb commit 03e6db1

File tree

3 files changed

+47
-24
lines changed

3 files changed

+47
-24
lines changed

arch/s390/include/asm/pgtable.h

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ static inline int is_module_addr(void *addr)
277277
#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
278278
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
279279
#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
280-
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
280+
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \
281+
_REGION3_ENTRY_PRESENT)
281282
#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
282283

283284
#define _REGION3_ENTRY_HARDWARE_BITS 0xfffffffffffff6ffUL
@@ -297,6 +298,14 @@ static inline int is_module_addr(void *addr)
297298

298299
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
299300

301+
/*
302+
* SW region present bit. For non-leaf region-third-table entries, bits 62-63
303+
* indicate the TABLE LENGTH and both must be set to 1. But such entries
304+
* would always be considered as present, so it is safe to use bit 63 as
305+
* PRESENT bit for PUD.
306+
*/
307+
#define _REGION3_ENTRY_PRESENT 0x0001
308+
300309
/* Bits in the segment table entry */
301310
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe3fUL
302311
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe3cUL
@@ -308,7 +317,7 @@ static inline int is_module_addr(void *addr)
308317
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
309318
#define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */
310319

311-
#define _SEGMENT_ENTRY (0)
320+
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT)
312321
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
313322

314323
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
@@ -324,6 +333,8 @@ static inline int is_module_addr(void *addr)
324333
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
325334
#endif
326335

336+
#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */
337+
327338
#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
328339
#define _PAGE_ENTRIES 256 /* number of page table entries */
329340

@@ -455,17 +466,22 @@ static inline int is_module_addr(void *addr)
455466
/*
456467
* Segment entry (large page) protection definitions.
457468
*/
458-
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
469+
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \
470+
_SEGMENT_ENTRY_INVALID | \
459471
_SEGMENT_ENTRY_PROTECT)
460-
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \
472+
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \
473+
_SEGMENT_ENTRY_PROTECT | \
461474
_SEGMENT_ENTRY_READ | \
462475
_SEGMENT_ENTRY_NOEXEC)
463-
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \
476+
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \
477+
_SEGMENT_ENTRY_PROTECT | \
464478
_SEGMENT_ENTRY_READ)
465-
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \
479+
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \
480+
_SEGMENT_ENTRY_READ | \
466481
_SEGMENT_ENTRY_WRITE | \
467482
_SEGMENT_ENTRY_NOEXEC)
468-
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \
483+
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \
484+
_SEGMENT_ENTRY_READ | \
469485
_SEGMENT_ENTRY_WRITE)
470486
#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
471487
_SEGMENT_ENTRY_LARGE | \
@@ -492,19 +508,22 @@ static inline int is_module_addr(void *addr)
492508
*/
493509

494510
#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
511+
_REGION3_ENTRY_PRESENT | \
495512
_REGION3_ENTRY_LARGE | \
496513
_REGION3_ENTRY_READ | \
497514
_REGION3_ENTRY_WRITE | \
498515
_REGION3_ENTRY_YOUNG | \
499516
_REGION3_ENTRY_DIRTY | \
500517
_REGION_ENTRY_NOEXEC)
501518
#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
519+
_REGION3_ENTRY_PRESENT | \
502520
_REGION3_ENTRY_LARGE | \
503521
_REGION3_ENTRY_READ | \
504522
_REGION3_ENTRY_YOUNG | \
505523
_REGION_ENTRY_PROTECT | \
506524
_REGION_ENTRY_NOEXEC)
507525
#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
526+
_REGION3_ENTRY_PRESENT | \
508527
_REGION3_ENTRY_LARGE | \
509528
_REGION3_ENTRY_READ | \
510529
_REGION3_ENTRY_WRITE | \
@@ -747,7 +766,7 @@ static inline int pud_present(pud_t pud)
747766
{
748767
if (pud_folded(pud))
749768
return 1;
750-
return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
769+
return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0;
751770
}
752771

753772
static inline int pud_none(pud_t pud)
@@ -762,13 +781,18 @@ static inline bool pud_leaf(pud_t pud)
762781
{
763782
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
764783
return 0;
765-
return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
784+
return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0);
785+
}
786+
787+
static inline int pmd_present(pmd_t pmd)
788+
{
789+
return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0;
766790
}
767791

768792
#define pmd_leaf pmd_leaf
769793
static inline bool pmd_leaf(pmd_t pmd)
770794
{
771-
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
795+
return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0);
772796
}
773797

774798
static inline int pmd_bad(pmd_t pmd)
@@ -800,11 +824,6 @@ static inline int p4d_bad(p4d_t p4d)
800824
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
801825
}
802826

803-
static inline int pmd_present(pmd_t pmd)
804-
{
805-
return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
806-
}
807-
808827
static inline int pmd_none(pmd_t pmd)
809828
{
810829
return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
@@ -1852,7 +1871,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
18521871

18531872
static inline int pmd_trans_huge(pmd_t pmd)
18541873
{
1855-
return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
1874+
return pmd_leaf(pmd);
18561875
}
18571876

18581877
#define has_transparent_hugepage has_transparent_hugepage

arch/s390/mm/gmap.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
587587
if (pmd_leaf(*pmd)) {
588588
*table = (pmd_val(*pmd) &
589589
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
590-
| _SEGMENT_ENTRY_GMAP_UC;
590+
| _SEGMENT_ENTRY_GMAP_UC
591+
| _SEGMENT_ENTRY;
591592
} else
592593
*table = pmd_val(*pmd) &
593594
_SEGMENT_ENTRY_HARDWARE_BITS;
@@ -2396,7 +2397,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
23962397
gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
23972398
pmdp_notify_gmap(gmap, pmdp, gaddr);
23982399
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2399-
_SEGMENT_ENTRY_GMAP_UC));
2400+
_SEGMENT_ENTRY_GMAP_UC |
2401+
_SEGMENT_ENTRY));
24002402
if (purge)
24012403
__pmdp_csp(pmdp);
24022404
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
@@ -2450,7 +2452,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
24502452
gaddr = __gmap_segment_gaddr(entry);
24512453
pmdp_notify_gmap(gmap, pmdp, gaddr);
24522454
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2453-
_SEGMENT_ENTRY_GMAP_UC));
2455+
_SEGMENT_ENTRY_GMAP_UC |
2456+
_SEGMENT_ENTRY));
24542457
if (MACHINE_HAS_TLB_GUEST)
24552458
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
24562459
gmap->asce, IDTE_LOCAL);
@@ -2485,7 +2488,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
24852488
gaddr = __gmap_segment_gaddr(entry);
24862489
pmdp_notify_gmap(gmap, pmdp, gaddr);
24872490
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2488-
_SEGMENT_ENTRY_GMAP_UC));
2491+
_SEGMENT_ENTRY_GMAP_UC |
2492+
_SEGMENT_ENTRY));
24892493
if (MACHINE_HAS_TLB_GUEST)
24902494
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
24912495
gmap->asce, IDTE_GLOBAL);

arch/s390/mm/hugetlbpage.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ static inline unsigned long __pte_to_rste(pte_t pte)
4848
*/
4949
if (pte_present(pte)) {
5050
rste = pte_val(pte) & PAGE_MASK;
51+
rste |= _SEGMENT_ENTRY_PRESENT;
5152
rste |= move_set_bit(pte_val(pte), _PAGE_READ,
5253
_SEGMENT_ENTRY_READ);
5354
rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
@@ -223,11 +224,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
223224
p4dp = p4d_offset(pgdp, addr);
224225
if (p4d_present(*p4dp)) {
225226
pudp = pud_offset(p4dp, addr);
226-
if (pud_present(*pudp)) {
227-
if (pud_leaf(*pudp))
228-
return (pte_t *) pudp;
227+
if (sz == PUD_SIZE)
228+
return (pte_t *)pudp;
229+
if (pud_present(*pudp))
229230
pmdp = pmd_offset(pudp, addr);
230-
}
231231
}
232232
}
233233
return (pte_t *) pmdp;

0 commit comments

Comments
 (0)