Skip to content

Commit deacdb3

Browse files
committed
Merge tag 'powerpc-5.8-8' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fix from Michael Ellerman: "Fix a bug introduced by the changes we made to lockless page table walking this cycle. When using the hash MMU, and perf with callchain recording, we can deadlock if the PMI interrupts a hash fault, and the callchain recording then takes a hash fault on the same page. Thanks to Nicholas Piggin, Aneesh Kumar K.V, Anton Blanchard, and Athira Rajeev" * tag 'powerpc-5.8-8' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/64s/hash: Fix hash_preload running with interrupts enabled
2 parents 14aab7e + 909adfc commit deacdb3

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

arch/powerpc/kernel/exceptions-64s.S

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3072,10 +3072,18 @@ do_hash_page:
30723072
ori r0,r0,DSISR_BAD_FAULT_64S@l
30733073
and. r0,r5,r0 /* weird error? */
30743074
bne- handle_page_fault /* if not, try to insert a HPTE */
3075+
3076+
/*
3077+
* If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
3078+
* don't call hash_page, just fail the fault. This is required to
3079+
* prevent re-entrancy problems in the hash code, namely perf
3080+
* interrupts hitting while something holds H_PAGE_BUSY, and taking a
3081+
* hash fault. See the comment in hash_preload().
3082+
*/
30753083
ld r11, PACA_THREAD_INFO(r13)
3076-
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
3077-
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
3078-
bne 77f /* then don't call hash_page now */
3084+
lwz r0,TI_PREEMPT(r11)
3085+
andis. r0,r0,NMI_MASK@h
3086+
bne 77f
30793087

30803088
/*
30813089
* r3 contains the trap number

arch/powerpc/mm/book3s64/hash_utils.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,6 +1559,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
15591559
pgd_t *pgdir;
15601560
int rc, ssize, update_flags = 0;
15611561
unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
1562+
unsigned long flags;
15621563

15631564
BUG_ON(get_region_id(ea) != USER_REGION_ID);
15641565

@@ -1592,6 +1593,28 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
15921593
return;
15931594
#endif /* CONFIG_PPC_64K_PAGES */
15941595

1596+
/*
1597+
* __hash_page_* must run with interrupts off, as it sets the
1598+
* H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any
1599+
* time and may take a hash fault reading the user stack, see
1600+
* read_user_stack_slow() in the powerpc/perf code.
1601+
*
1602+
* If that takes a hash fault on the same page as we lock here, it
1603+
* will bail out when seeing H_PAGE_BUSY set, and retry the access
1604+
* leading to an infinite loop.
1605+
*
1606+
* Disabling interrupts here does not prevent perf interrupts, but it
1607+
* will prevent them taking hash faults (see the NMI test in
1608+
* do_hash_page), then read_user_stack's copy_from_user_nofault will
1609+
* fail and perf will fall back to read_user_stack_slow(), which
1610+
* walks the Linux page tables.
1611+
*
1612+
* Interrupts must also be off for the duration of the
1613+
* mm_is_thread_local test and update, to prevent preempt running the
1614+
* mm on another CPU (XXX: this may be racy vs kthread_use_mm).
1615+
*/
1616+
local_irq_save(flags);
1617+
15951618
/* Is that local to this CPU ? */
15961619
if (mm_is_thread_local(mm))
15971620
update_flags |= HPTE_LOCAL_UPDATE;
@@ -1614,6 +1637,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
16141637
mm_ctx_user_psize(&mm->context),
16151638
mm_ctx_user_psize(&mm->context),
16161639
pte_val(*ptep));
1640+
1641+
local_irq_restore(flags);
16171642
}
16181643

16191644
/*

arch/powerpc/perf/core-book3s.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,12 @@ static void __perf_event_interrupt(struct pt_regs *regs)
21792179

21802180
perf_read_regs(regs);
21812181

2182+
/*
2183+
* If perf interrupts hit in a local_irq_disable (soft-masked) region,
2184+
* we consider them as NMIs. This is required to prevent hash faults on
2185+
* user addresses when reading callchains. See the NMI test in
2186+
* do_hash_page.
2187+
*/
21822188
nmi = perf_intr_is_nmi(regs);
21832189
if (nmi)
21842190
nmi_enter();

0 commit comments

Comments
 (0)