@@ -34,62 +34,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
34
34
ptep -> pte_low = pte .pte_low ;
35
35
}
36
36
37
- #define pmd_read_atomic pmd_read_atomic
38
- /*
39
- * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
40
- * a "*pmdp" dereference done by GCC. Problem is, in certain places
41
- * where pte_offset_map_lock() is called, concurrent page faults are
42
- * allowed, if the mmap_lock is hold for reading. An example is mincore
43
- * vs page faults vs MADV_DONTNEED. On the page fault side
44
- * pmd_populate() rightfully does a set_64bit(), but if we're reading the
45
- * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
46
- * because GCC will not read the 64-bit value of the pmd atomically.
47
- *
48
- * To fix this all places running pte_offset_map_lock() while holding the
49
- * mmap_lock in read mode, shall read the pmdp pointer using this
50
- * function to know if the pmd is null or not, and in turn to know if
51
- * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
52
- * operations.
53
- *
54
- * Without THP if the mmap_lock is held for reading, the pmd can only
55
- * transition from null to not null while pmd_read_atomic() runs. So
56
- * we can always return atomic pmd values with this function.
57
- *
58
- * With THP if the mmap_lock is held for reading, the pmd can become
59
- * trans_huge or none or point to a pte (and in turn become "stable")
60
- * at any time under pmd_read_atomic(). We could read it truly
61
- * atomically here with an atomic64_read() for the THP enabled case (and
62
- * it would be a whole lot simpler), but to avoid using cmpxchg8b we
63
- * only return an atomic pmdval if the low part of the pmdval is later
64
- * found to be stable (i.e. pointing to a pte). We are also returning a
65
- * 'none' (zero) pmdval if the low part of the pmd is zero.
66
- *
67
- * In some cases the high and low part of the pmdval returned may not be
68
- * consistent if THP is enabled (the low part may point to previously
69
- * mapped hugepage, while the high part may point to a more recently
70
- * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
71
- * needs the low part of the pmd to be read atomically to decide if the
72
- * pmd is unstable or not, with the only exception when the low part
73
- * of the pmd is zero, in which case we return a 'none' pmd.
74
- */
75
- static inline pmd_t pmd_read_atomic (pmd_t * pmdp )
76
- {
77
- pmdval_t ret ;
78
- u32 * tmp = (u32 * )pmdp ;
79
-
80
- ret = (pmdval_t ) (* tmp );
81
- if (ret ) {
82
- /*
83
- * If the low part is null, we must not read the high part
84
- * or we can end up with a partial pmd.
85
- */
86
- smp_rmb ();
87
- ret |= ((pmdval_t )* (tmp + 1 )) << 32 ;
88
- }
89
-
90
- return (pmd_t ) { .pmd = ret };
91
- }
92
-
93
37
static inline void native_set_pte_atomic (pte_t * ptep , pte_t pte )
94
38
{
95
39
set_64bit ((unsigned long long * )(ptep ), native_pte_val (pte ));
0 commit comments