|
56 | 56 | #include <linux/kfifo.h>
|
57 | 57 | #include <linux/ratelimit.h>
|
58 | 58 | #include <linux/page-isolation.h>
|
| 59 | +#include <linux/pagewalk.h> |
59 | 60 | #include "internal.h"
|
60 | 61 | #include "ras/ras_event.h"
|
61 | 62 |
|
@@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
|
554 | 555 | collect_procs_file(page, tokill, force_early);
|
555 | 556 | }
|
556 | 557 |
|
| 558 | +struct hwp_walk { |
| 559 | + struct to_kill tk; |
| 560 | + unsigned long pfn; |
| 561 | + int flags; |
| 562 | +}; |
| 563 | + |
| 564 | +static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift) |
| 565 | +{ |
| 566 | + tk->addr = addr; |
| 567 | + tk->size_shift = shift; |
| 568 | +} |
| 569 | + |
| 570 | +static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, |
| 571 | + unsigned long poisoned_pfn, struct to_kill *tk) |
| 572 | +{ |
| 573 | + unsigned long pfn = 0; |
| 574 | + |
| 575 | + if (pte_present(pte)) { |
| 576 | + pfn = pte_pfn(pte); |
| 577 | + } else { |
| 578 | + swp_entry_t swp = pte_to_swp_entry(pte); |
| 579 | + |
| 580 | + if (is_hwpoison_entry(swp)) |
| 581 | + pfn = hwpoison_entry_to_pfn(swp); |
| 582 | + } |
| 583 | + |
| 584 | + if (!pfn || pfn != poisoned_pfn) |
| 585 | + return 0; |
| 586 | + |
| 587 | + set_to_kill(tk, addr, shift); |
| 588 | + return 1; |
| 589 | +} |
| 590 | + |
| 591 | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 592 | +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, |
| 593 | + struct hwp_walk *hwp) |
| 594 | +{ |
| 595 | + pmd_t pmd = *pmdp; |
| 596 | + unsigned long pfn; |
| 597 | + unsigned long hwpoison_vaddr; |
| 598 | + |
| 599 | + if (!pmd_present(pmd)) |
| 600 | + return 0; |
| 601 | + pfn = pmd_pfn(pmd); |
| 602 | + if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) { |
| 603 | + hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT); |
| 604 | + set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT); |
| 605 | + return 1; |
| 606 | + } |
| 607 | + return 0; |
| 608 | +} |
| 609 | +#else |
| 610 | +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, |
| 611 | + struct hwp_walk *hwp) |
| 612 | +{ |
| 613 | + return 0; |
| 614 | +} |
| 615 | +#endif |
| 616 | + |
| 617 | +static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr, |
| 618 | + unsigned long end, struct mm_walk *walk) |
| 619 | +{ |
| 620 | + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; |
| 621 | + int ret = 0; |
| 622 | + pte_t *ptep; |
| 623 | + spinlock_t *ptl; |
| 624 | + |
| 625 | + ptl = pmd_trans_huge_lock(pmdp, walk->vma); |
| 626 | + if (ptl) { |
| 627 | + ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp); |
| 628 | + spin_unlock(ptl); |
| 629 | + goto out; |
| 630 | + } |
| 631 | + |
| 632 | + if (pmd_trans_unstable(pmdp)) |
| 633 | + goto out; |
| 634 | + |
| 635 | + ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl); |
| 636 | + for (; addr != end; ptep++, addr += PAGE_SIZE) { |
| 637 | + ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT, |
| 638 | + hwp->pfn, &hwp->tk); |
| 639 | + if (ret == 1) |
| 640 | + break; |
| 641 | + } |
| 642 | + pte_unmap_unlock(ptep - 1, ptl); |
| 643 | +out: |
| 644 | + cond_resched(); |
| 645 | + return ret; |
| 646 | +} |
| 647 | + |
| 648 | +#ifdef CONFIG_HUGETLB_PAGE |
| 649 | +static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, |
| 650 | + unsigned long addr, unsigned long end, |
| 651 | + struct mm_walk *walk) |
| 652 | +{ |
| 653 | + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; |
| 654 | + pte_t pte = huge_ptep_get(ptep); |
| 655 | + struct hstate *h = hstate_vma(walk->vma); |
| 656 | + |
| 657 | + return check_hwpoisoned_entry(pte, addr, huge_page_shift(h), |
| 658 | + hwp->pfn, &hwp->tk); |
| 659 | +} |
| 660 | +#else |
| 661 | +#define hwpoison_hugetlb_range NULL |
| 662 | +#endif |
| 663 | + |
| 664 | +static struct mm_walk_ops hwp_walk_ops = { |
| 665 | + .pmd_entry = hwpoison_pte_range, |
| 666 | + .hugetlb_entry = hwpoison_hugetlb_range, |
| 667 | +}; |
| 668 | + |
| 669 | +/* |
| 670 | + * Sends SIGBUS to the current process with error info. |
| 671 | + * |
| 672 | + * This function is intended to handle "Action Required" MCEs on already |
| 673 | + * hardware poisoned pages. They could happen, for example, when |
| 674 | + * memory_failure() failed to unmap the error page at the first call, or |
| 675 | + * when multiple local machine checks happened on different CPUs. |
| 676 | + * |
| 677 | + * MCE handler currently has no easy access to the error virtual address, |
| 678 | + * so this function walks page table to find it. The returned virtual address |
| 679 | + * is proper in most cases, but it could be wrong when the application |
| 680 | + * process has multiple entries mapping the error page. |
| 681 | + */ |
| 682 | +static int kill_accessing_process(struct task_struct *p, unsigned long pfn, |
| 683 | + int flags) |
| 684 | +{ |
| 685 | + int ret; |
| 686 | + struct hwp_walk priv = { |
| 687 | + .pfn = pfn, |
| 688 | + }; |
| 689 | + priv.tk.tsk = p; |
| 690 | + |
| 691 | + mmap_read_lock(p->mm); |
| 692 | + ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, |
| 693 | + (void *)&priv); |
| 694 | + if (ret == 1 && priv.tk.addr) |
| 695 | + kill_proc(&priv.tk, pfn, flags); |
| 696 | + mmap_read_unlock(p->mm); |
| 697 | + return ret ? -EFAULT : -EHWPOISON; |
| 698 | +} |
| 699 | + |
557 | 700 | static const char *action_name[] = {
|
558 | 701 | [MF_IGNORED] = "Ignored",
|
559 | 702 | [MF_FAILED] = "Failed",
|
@@ -1267,7 +1410,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
|
1267 | 1410 | if (TestSetPageHWPoison(head)) {
|
1268 | 1411 | pr_err("Memory failure: %#lx: already hardware poisoned\n",
|
1269 | 1412 | pfn);
|
1270 |
| - return -EHWPOISON; |
| 1413 | + res = -EHWPOISON; |
| 1414 | + if (flags & MF_ACTION_REQUIRED) |
| 1415 | + res = kill_accessing_process(current, page_to_pfn(head), flags); |
| 1416 | + return res; |
1271 | 1417 | }
|
1272 | 1418 |
|
1273 | 1419 | num_poisoned_pages_inc();
|
@@ -1476,6 +1622,8 @@ int memory_failure(unsigned long pfn, int flags)
|
1476 | 1622 | pr_err("Memory failure: %#lx: already hardware poisoned\n",
|
1477 | 1623 | pfn);
|
1478 | 1624 | res = -EHWPOISON;
|
| 1625 | + if (flags & MF_ACTION_REQUIRED) |
| 1626 | + res = kill_accessing_process(current, pfn, flags); |
1479 | 1627 | goto unlock_mutex;
|
1480 | 1628 | }
|
1481 | 1629 |
|
|
0 commit comments