Skip to content

Commit f934f6b

Browse files
gerald-schaeferhcahca
authored andcommitted
s390/mm: Introduce region-third and segment table swap entries
Introduce region-third (PUD) and segment table (PMD) swap entries, and make hugetlbfs RSTE <-> PTE conversion code aware of them, so that they can be used for hugetlbfs PTE_MARKER entries. Future work could also build on this to enable THP_SWAP and THP_MIGRATION for s390. Similar to PTE swap entries, bits 0-51 can be used to store the swap offset, but bits 57-61 cannot be used for swap type because that overlaps with the INVALID and TABLE TYPE bits. PMD/PUD swap entries must be invalid, and have a correct table type so that pud_folded() check still works. Bits 53-57 can be used for swap type, but those include the PROTECT bit. So unlike swap PTEs, the PROTECT bit cannot be used to mark the swap entry. Use the "Common-Segment/Region" bit 59 instead for that. Also remove the !MACHINE_HAS_NX check in __set_huge_pte_at(). Otherwise, that would clear the _SEGMENT_ENTRY_NOEXEC bit also for swap entries, where it is used for encoding the swap type. The architecture only requires this bit to be 0 for PTEs, with !MACHINE_HAS_NX, not for segment or region-third entries. And the check is also redundant, because after __pte_to_rste() conversion, for non-swap PTEs it would only be set if it was already set in the PTE, which should never be the case for !MACHINE_HAS_NX. This is a prerequisite for hugetlbfs PTE_MARKER support on s390, which is needed to fix a regression introduced with commit 8a13897 ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs"). That commit depends on the availability of swap entries for hugetlbfs, which were not available for s390 so far. Reviewed-by: Alexander Gordeev <[email protected]> Signed-off-by: Gerald Schaefer <[email protected]> Signed-off-by: Heiko Carstens <[email protected]>
1 parent 03e6db1 commit f934f6b

File tree

2 files changed

+71
-5
lines changed

2 files changed

+71
-5
lines changed

arch/s390/include/asm/pgtable.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ static inline int is_module_addr(void *addr)
286286
#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
287287
#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */
288288
#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */
289+
#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */
289290
#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */
290291
#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */
291292
#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */
@@ -323,6 +324,7 @@ static inline int is_module_addr(void *addr)
323324
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
324325
#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
325326

327+
#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */
326328
#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
327329
#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */
328330
#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */
@@ -335,6 +337,10 @@ static inline int is_module_addr(void *addr)
335337

336338
#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */
337339

340+
/* Common bits in region and segment table entries, for swap entries */
341+
#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */
342+
#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */
343+
338344
#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
339345
#define _PAGE_ENTRIES 256 /* number of page table entries */
340346

@@ -1931,6 +1937,53 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
19311937
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
19321938
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
19331939

1940+
/*
1941+
* 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE)
1942+
* Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste
1943+
* as invalid.
1944+
* A swap entry is indicated by bit pattern (rste & 0x011) == 0x010
1945+
* | offset |Xtype |11TT|S0|
1946+
* |0000000000111111111122222222223333333333444444444455|555555|5566|66|
1947+
* |0123456789012345678901234567890123456789012345678901|234567|8901|23|
1948+
*
1949+
* Bits 0-51 store the offset.
1950+
* Bits 53-57 store the type.
1951+
* Bit 62 (S) is used for softdirty tracking.
1952+
* Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT.
1953+
* Bit 52 (X) is unused.
1954+
*/
1955+
1956+
#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1)
1957+
#define __SWP_OFFSET_SHIFT_RSTE 12
1958+
#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1)
1959+
#define __SWP_TYPE_SHIFT_RSTE 6
1960+
1961+
/*
1962+
* TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3
1963+
* bits 0x01. See also __set_huge_pte_at().
1964+
*/
1965+
static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset)
1966+
{
1967+
unsigned long rste;
1968+
1969+
rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM;
1970+
rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE;
1971+
rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE;
1972+
return rste;
1973+
}
1974+
1975+
static inline unsigned long __swp_type_rste(swp_entry_t entry)
1976+
{
1977+
return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE;
1978+
}
1979+
1980+
static inline unsigned long __swp_offset_rste(swp_entry_t entry)
1981+
{
1982+
return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE;
1983+
}
1984+
1985+
#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste })
1986+
19341987
extern int vmem_add_mapping(unsigned long start, unsigned long size);
19351988
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
19361989
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);

arch/s390/mm/hugetlbpage.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
static inline unsigned long __pte_to_rste(pte_t pte)
2626
{
27+
swp_entry_t arch_entry;
2728
unsigned long rste;
2829

2930
/*
@@ -67,20 +68,29 @@ static inline unsigned long __pte_to_rste(pte_t pte)
6768
#endif
6869
rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
6970
_SEGMENT_ENTRY_NOEXEC);
71+
} else if (!pte_none(pte)) {
72+
/* swap pte */
73+
arch_entry = __pte_to_swp_entry(pte);
74+
rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry));
7075
} else
7176
rste = _SEGMENT_ENTRY_EMPTY;
7277
return rste;
7378
}
7479

7580
static inline pte_t __rste_to_pte(unsigned long rste)
7681
{
82+
swp_entry_t arch_entry;
7783
unsigned long pteval;
78-
int present;
84+
int present, none;
85+
pte_t pte;
7986

80-
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
87+
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
8188
present = pud_present(__pud(rste));
82-
else
89+
none = pud_none(__pud(rste));
90+
} else {
8391
present = pmd_present(__pmd(rste));
92+
none = pmd_none(__pmd(rste));
93+
}
8494

8595
/*
8696
* Convert encoding pmd / pud bits pte bits
@@ -115,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste)
115125
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
116126
#endif
117127
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
128+
} else if (!none) {
129+
/* swap rste */
130+
arch_entry = __rste_to_swp_entry(rste);
131+
pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
132+
pteval = pte_val(pte);
118133
} else
119134
pteval = _PAGE_INVALID;
120135
return __pte(pteval);
@@ -149,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
149164
unsigned long rste;
150165

151166
rste = __pte_to_rste(pte);
152-
if (!MACHINE_HAS_NX)
153-
rste &= ~_SEGMENT_ENTRY_NOEXEC;
154167

155168
/* Set correct table type for 2G hugepages */
156169
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {

0 commit comments

Comments
 (0)