Skip to content

Commit a3f5d80

Browse files
nhoriguchitorvalds
authored andcommitted
mm,hwpoison: send SIGBUS with error virutal address
Now an action required MCE in already hwpoisoned address surely sends a SIGBUS to current process, but the SIGBUS doesn't convey error virtual address. That's not optimal for hwpoison-aware applications. To fix the issue, make memory_failure() call kill_accessing_process(), that does pagetable walk to find the error virtual address. It could find multiple virtual addresses for the same error page, and it seems hard to tell which virtual address is correct one. But that's rare and sending incorrect virtual address could be better than no address. So let's report the first found virtual address for now. [[email protected]: fix walk_page_range() return] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Naoya Horiguchi <[email protected]> Cc: Tony Luck <[email protected]> Cc: Aili Yao <[email protected]> Cc: Oscar Salvador <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Jue Wang <[email protected]> Cc: Borislav Petkov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 203c06e commit a3f5d80

File tree

3 files changed

+165
-3
lines changed

3 files changed

+165
-3
lines changed

arch/x86/kernel/cpu/mce/core.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,19 +1257,28 @@ static void kill_me_maybe(struct callback_head *cb)
12571257
{
12581258
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
12591259
int flags = MF_ACTION_REQUIRED;
1260+
int ret;
12601261

12611262
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
12621263

12631264
if (!p->mce_ripv)
12641265
flags |= MF_MUST_KILL;
12651266

1266-
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
1267-
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
1267+
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
1268+
if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
12681269
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
12691270
sync_core();
12701271
return;
12711272
}
12721273

1274+
/*
1275+
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
1276+
* to the current process with the proper error info, so no need to
1277+
* send SIGBUS here again.
1278+
*/
1279+
if (ret == -EHWPOISON)
1280+
return;
1281+
12731282
if (p->mce_vaddr != (void __user *)-1l) {
12741283
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
12751284
} else {

include/linux/swapops.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
330330
return swp_type(entry) == SWP_HWPOISON;
331331
}
332332

333+
static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
334+
{
335+
return swp_offset(entry);
336+
}
337+
333338
static inline void num_poisoned_pages_inc(void)
334339
{
335340
atomic_long_inc(&num_poisoned_pages);

mm/memory-failure.c

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
#include <linux/kfifo.h>
5757
#include <linux/ratelimit.h>
5858
#include <linux/page-isolation.h>
59+
#include <linux/pagewalk.h>
5960
#include "internal.h"
6061
#include "ras/ras_event.h"
6162

@@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
554555
collect_procs_file(page, tokill, force_early);
555556
}
556557

558+
struct hwp_walk {
559+
struct to_kill tk;
560+
unsigned long pfn;
561+
int flags;
562+
};
563+
564+
static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
565+
{
566+
tk->addr = addr;
567+
tk->size_shift = shift;
568+
}
569+
570+
static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
571+
unsigned long poisoned_pfn, struct to_kill *tk)
572+
{
573+
unsigned long pfn = 0;
574+
575+
if (pte_present(pte)) {
576+
pfn = pte_pfn(pte);
577+
} else {
578+
swp_entry_t swp = pte_to_swp_entry(pte);
579+
580+
if (is_hwpoison_entry(swp))
581+
pfn = hwpoison_entry_to_pfn(swp);
582+
}
583+
584+
if (!pfn || pfn != poisoned_pfn)
585+
return 0;
586+
587+
set_to_kill(tk, addr, shift);
588+
return 1;
589+
}
590+
591+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
592+
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
593+
struct hwp_walk *hwp)
594+
{
595+
pmd_t pmd = *pmdp;
596+
unsigned long pfn;
597+
unsigned long hwpoison_vaddr;
598+
599+
if (!pmd_present(pmd))
600+
return 0;
601+
pfn = pmd_pfn(pmd);
602+
if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
603+
hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
604+
set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
605+
return 1;
606+
}
607+
return 0;
608+
}
609+
#else
610+
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
611+
struct hwp_walk *hwp)
612+
{
613+
return 0;
614+
}
615+
#endif
616+
617+
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
618+
unsigned long end, struct mm_walk *walk)
619+
{
620+
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
621+
int ret = 0;
622+
pte_t *ptep;
623+
spinlock_t *ptl;
624+
625+
ptl = pmd_trans_huge_lock(pmdp, walk->vma);
626+
if (ptl) {
627+
ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
628+
spin_unlock(ptl);
629+
goto out;
630+
}
631+
632+
if (pmd_trans_unstable(pmdp))
633+
goto out;
634+
635+
ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
636+
for (; addr != end; ptep++, addr += PAGE_SIZE) {
637+
ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
638+
hwp->pfn, &hwp->tk);
639+
if (ret == 1)
640+
break;
641+
}
642+
pte_unmap_unlock(ptep - 1, ptl);
643+
out:
644+
cond_resched();
645+
return ret;
646+
}
647+
648+
#ifdef CONFIG_HUGETLB_PAGE
649+
static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
650+
unsigned long addr, unsigned long end,
651+
struct mm_walk *walk)
652+
{
653+
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
654+
pte_t pte = huge_ptep_get(ptep);
655+
struct hstate *h = hstate_vma(walk->vma);
656+
657+
return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
658+
hwp->pfn, &hwp->tk);
659+
}
660+
#else
661+
#define hwpoison_hugetlb_range NULL
662+
#endif
663+
664+
static struct mm_walk_ops hwp_walk_ops = {
665+
.pmd_entry = hwpoison_pte_range,
666+
.hugetlb_entry = hwpoison_hugetlb_range,
667+
};
668+
669+
/*
670+
* Sends SIGBUS to the current process with error info.
671+
*
672+
* This function is intended to handle "Action Required" MCEs on already
673+
* hardware poisoned pages. They could happen, for example, when
674+
* memory_failure() failed to unmap the error page at the first call, or
675+
* when multiple local machine checks happened on different CPUs.
676+
*
677+
* MCE handler currently has no easy access to the error virtual address,
678+
* so this function walks page table to find it. The returned virtual address
679+
* is proper in most cases, but it could be wrong when the application
680+
* process has multiple entries mapping the error page.
681+
*/
682+
static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
683+
int flags)
684+
{
685+
int ret;
686+
struct hwp_walk priv = {
687+
.pfn = pfn,
688+
};
689+
priv.tk.tsk = p;
690+
691+
mmap_read_lock(p->mm);
692+
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
693+
(void *)&priv);
694+
if (ret == 1 && priv.tk.addr)
695+
kill_proc(&priv.tk, pfn, flags);
696+
mmap_read_unlock(p->mm);
697+
return ret ? -EFAULT : -EHWPOISON;
698+
}
699+
557700
static const char *action_name[] = {
558701
[MF_IGNORED] = "Ignored",
559702
[MF_FAILED] = "Failed",
@@ -1267,7 +1410,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
12671410
if (TestSetPageHWPoison(head)) {
12681411
pr_err("Memory failure: %#lx: already hardware poisoned\n",
12691412
pfn);
1270-
return -EHWPOISON;
1413+
res = -EHWPOISON;
1414+
if (flags & MF_ACTION_REQUIRED)
1415+
res = kill_accessing_process(current, page_to_pfn(head), flags);
1416+
return res;
12711417
}
12721418

12731419
num_poisoned_pages_inc();
@@ -1476,6 +1622,8 @@ int memory_failure(unsigned long pfn, int flags)
14761622
pr_err("Memory failure: %#lx: already hardware poisoned\n",
14771623
pfn);
14781624
res = -EHWPOISON;
1625+
if (flags & MF_ACTION_REQUIRED)
1626+
res = kill_accessing_process(current, pfn, flags);
14791627
goto unlock_mutex;
14801628
}
14811629

0 commit comments

Comments
 (0)