Skip to content

Commit 024d232

Browse files
Peter Zijlstrahansendc
authored andcommitted
mm: Fix pmd_read_atomic()
AFAICT there's no reason to do anything different than what we do for PTEs. Make it so (also affects SH). Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/20221022114424.711181252%40infradead.org
1 parent 0862ff0 commit 024d232

File tree

2 files changed

+37
-66
lines changed

2 files changed

+37
-66
lines changed

arch/x86/include/asm/pgtable-3level.h

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -34,62 +34,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
3434
ptep->pte_low = pte.pte_low;
3535
}
3636

37-
#define pmd_read_atomic pmd_read_atomic
38-
/*
39-
* pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
40-
* a "*pmdp" dereference done by GCC. Problem is, in certain places
41-
* where pte_offset_map_lock() is called, concurrent page faults are
42-
* allowed, if the mmap_lock is hold for reading. An example is mincore
43-
* vs page faults vs MADV_DONTNEED. On the page fault side
44-
* pmd_populate() rightfully does a set_64bit(), but if we're reading the
45-
* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
46-
* because GCC will not read the 64-bit value of the pmd atomically.
47-
*
48-
* To fix this all places running pte_offset_map_lock() while holding the
49-
* mmap_lock in read mode, shall read the pmdp pointer using this
50-
* function to know if the pmd is null or not, and in turn to know if
51-
* they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
52-
* operations.
53-
*
54-
* Without THP if the mmap_lock is held for reading, the pmd can only
55-
* transition from null to not null while pmd_read_atomic() runs. So
56-
* we can always return atomic pmd values with this function.
57-
*
58-
* With THP if the mmap_lock is held for reading, the pmd can become
59-
* trans_huge or none or point to a pte (and in turn become "stable")
60-
* at any time under pmd_read_atomic(). We could read it truly
61-
* atomically here with an atomic64_read() for the THP enabled case (and
62-
* it would be a whole lot simpler), but to avoid using cmpxchg8b we
63-
* only return an atomic pmdval if the low part of the pmdval is later
64-
* found to be stable (i.e. pointing to a pte). We are also returning a
65-
* 'none' (zero) pmdval if the low part of the pmd is zero.
66-
*
67-
* In some cases the high and low part of the pmdval returned may not be
68-
* consistent if THP is enabled (the low part may point to previously
69-
* mapped hugepage, while the high part may point to a more recently
70-
* mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
71-
* needs the low part of the pmd to be read atomically to decide if the
72-
* pmd is unstable or not, with the only exception when the low part
73-
* of the pmd is zero, in which case we return a 'none' pmd.
74-
*/
75-
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
76-
{
77-
pmdval_t ret;
78-
u32 *tmp = (u32 *)pmdp;
79-
80-
ret = (pmdval_t) (*tmp);
81-
if (ret) {
82-
/*
83-
* If the low part is null, we must not read the high part
84-
* or we can end up with a partial pmd.
85-
*/
86-
smp_rmb();
87-
ret |= ((pmdval_t)*(tmp + 1)) << 32;
88-
}
89-
90-
return (pmd_t) { .pmd = ret };
91-
}
92-
9337
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
9438
{
9539
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));

include/linux/pgtable.h

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,13 @@ static inline pte_t ptep_get(pte_t *ptep)
298298
}
299299
#endif
300300

301+
#ifndef __HAVE_ARCH_PMDP_GET
302+
static inline pmd_t pmdp_get(pmd_t *pmdp)
303+
{
304+
return READ_ONCE(*pmdp);
305+
}
306+
#endif
307+
301308
#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
302309
/*
303310
* For walking the pagetables without holding any locks. Some architectures
@@ -340,15 +347,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep)
340347

341348
return pte;
342349
}
343-
#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
350+
#define ptep_get_lockless ptep_get_lockless
351+
352+
#if CONFIG_PGTABLE_LEVELS > 2
353+
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
354+
{
355+
pmd_t pmd;
356+
357+
do {
358+
pmd.pmd_low = pmdp->pmd_low;
359+
smp_rmb();
360+
pmd.pmd_high = pmdp->pmd_high;
361+
smp_rmb();
362+
} while (unlikely(pmd.pmd_low != pmdp->pmd_low));
363+
364+
return pmd;
365+
}
366+
#define pmdp_get_lockless pmdp_get_lockless
367+
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
368+
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
369+
344370
/*
345371
* We require that the PTE can be read atomically.
346372
*/
373+
#ifndef ptep_get_lockless
347374
static inline pte_t ptep_get_lockless(pte_t *ptep)
348375
{
349376
return ptep_get(ptep);
350377
}
351-
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
378+
#endif
379+
380+
#ifndef pmdp_get_lockless
381+
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
382+
{
383+
return pmdp_get(pmdp);
384+
}
385+
#endif
352386

353387
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
354388
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -1318,17 +1352,10 @@ static inline int pud_trans_unstable(pud_t *pud)
13181352
#endif
13191353
}
13201354

1321-
#ifndef pmd_read_atomic
13221355
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
13231356
{
1324-
/*
1325-
* Depend on compiler for an atomic pmd read. NOTE: this is
1326-
* only going to work, if the pmdval_t isn't larger than
1327-
* an unsigned long.
1328-
*/
1329-
return *pmdp;
1357+
return pmdp_get_lockless(pmdp);
13301358
}
1331-
#endif
13321359

13331360
#ifndef arch_needs_pgtable_deposit
13341361
#define arch_needs_pgtable_deposit() (false)

0 commit comments

Comments
 (0)