Skip to content

Commit 0ed950d

Browse files
nhoriguchitorvalds
authored andcommitted
mm,hwpoison: make get_hwpoison_page() call get_any_page()
__get_hwpoison_page() could fail to grab refcount by some race condition, so it's helpful if we can handle it by retrying. We already have retry logic, so make get_hwpoison_page() call get_any_page() when called from memory_failure(). As a result, get_hwpoison_page() can return negative values (i.e. error code), so some callers are also changed to handle error cases. soft_offline_page() does nothing for -EBUSY because that's enough and users in userspace can easily handle it. unpoison_memory() is also unchanged because it's broken and need thorough fixes (will be done later). Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Naoya Horiguchi <[email protected]> Cc: Oscar Salvador <[email protected]> Cc: Muchun Song <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Tony Luck <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent a3f5d80 commit 0ed950d

File tree

2 files changed

+111
-85
lines changed

2 files changed

+111
-85
lines changed

mm/hugetlb.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5938,6 +5938,8 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
59385938
*hugetlb = true;
59395939
if (HPageFreed(page) || HPageMigratable(page))
59405940
ret = get_page_unless_zero(page);
5941+
else
5942+
ret = -EBUSY;
59415943
}
59425944
spin_unlock_irq(&hugetlb_lock);
59435945
return ret;

mm/memory-failure.c

Lines changed: 109 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page)
11171117
return PageLRU(page) || __PageMovable(page);
11181118
}
11191119

1120-
/**
1121-
* __get_hwpoison_page() - Get refcount for memory error handling:
1122-
* @page: raw error page (hit by memory error)
1123-
*
1124-
* Return: return 0 if failed to grab the refcount, otherwise true (some
1125-
* non-zero value.)
1126-
*/
11271120
static int __get_hwpoison_page(struct page *page)
11281121
{
11291122
struct page *head = compound_head(page);
@@ -1168,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page)
11681161
return 0;
11691162
}
11701163

1171-
/*
1172-
* Safely get reference count of an arbitrary page.
1173-
*
1174-
* Returns 0 for a free page, 1 for an in-use page,
1175-
* -EIO for a page-type we cannot handle and -EBUSY if we raced with an
1176-
* allocation.
1177-
* We only incremented refcount in case the page was already in-use and it
1178-
* is a known type we can handle.
1179-
*/
11801164
static int get_any_page(struct page *p, unsigned long flags)
11811165
{
11821166
int ret = 0, pass = 0;
@@ -1186,50 +1170,77 @@ static int get_any_page(struct page *p, unsigned long flags)
11861170
count_increased = true;
11871171

11881172
try_again:
1189-
if (!count_increased && !__get_hwpoison_page(p)) {
1190-
if (page_count(p)) {
1191-
/* We raced with an allocation, retry. */
1192-
if (pass++ < 3)
1193-
goto try_again;
1194-
ret = -EBUSY;
1195-
} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
1196-
/* We raced with put_page, retry. */
1173+
if (!count_increased) {
1174+
ret = __get_hwpoison_page(p);
1175+
if (!ret) {
1176+
if (page_count(p)) {
1177+
/* We raced with an allocation, retry. */
1178+
if (pass++ < 3)
1179+
goto try_again;
1180+
ret = -EBUSY;
1181+
} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
1182+
/* We raced with put_page, retry. */
1183+
if (pass++ < 3)
1184+
goto try_again;
1185+
ret = -EIO;
1186+
}
1187+
goto out;
1188+
} else if (ret == -EBUSY) {
1189+
/* We raced with freeing huge page to buddy, retry. */
11971190
if (pass++ < 3)
11981191
goto try_again;
1199-
ret = -EIO;
1192+
goto out;
12001193
}
1194+
}
1195+
1196+
if (PageHuge(p) || HWPoisonHandlable(p)) {
1197+
ret = 1;
12011198
} else {
1202-
if (PageHuge(p) || HWPoisonHandlable(p)) {
1203-
ret = 1;
1204-
} else {
1205-
/*
1206-
* A page we cannot handle. Check whether we can turn
1207-
* it into something we can handle.
1208-
*/
1209-
if (pass++ < 3) {
1210-
put_page(p);
1211-
shake_page(p, 1);
1212-
count_increased = false;
1213-
goto try_again;
1214-
}
1199+
/*
1200+
* A page we cannot handle. Check whether we can turn
1201+
* it into something we can handle.
1202+
*/
1203+
if (pass++ < 3) {
12151204
put_page(p);
1216-
ret = -EIO;
1205+
shake_page(p, 1);
1206+
count_increased = false;
1207+
goto try_again;
12171208
}
1209+
put_page(p);
1210+
ret = -EIO;
12181211
}
1219-
1212+
out:
12201213
return ret;
12211214
}
12221215

1223-
static int get_hwpoison_page(struct page *p, unsigned long flags,
1224-
enum mf_flags ctxt)
1216+
/**
1217+
* get_hwpoison_page() - Get refcount for memory error handling
1218+
* @p: Raw error page (hit by memory error)
1219+
* @flags: Flags controlling behavior of error handling
1220+
*
1221+
* get_hwpoison_page() takes a page refcount of an error page to handle memory
1222+
* error on it, after checking that the error page is in a well-defined state
1223+
* (defined as a page-type we can successfully handle the memor error on it,
1224+
* such as LRU page and hugetlb page).
1225+
*
1226+
* Memory error handling could be triggered at any time on any type of page,
1227+
* so it's prone to race with typical memory management lifecycle (like
1228+
* allocation and free). So to avoid such races, get_hwpoison_page() takes
1229+
* extra care for the error page's state (as done in __get_hwpoison_page()),
1230+
* and has some retry logic in get_any_page().
1231+
*
1232+
* Return: 0 on failure,
1233+
* 1 on success for in-use pages in a well-defined state,
1234+
* -EIO for pages on which we can not handle memory errors,
1235+
* -EBUSY when get_hwpoison_page() has raced with page lifecycle
1236+
* operations like allocation and free.
1237+
*/
1238+
static int get_hwpoison_page(struct page *p, unsigned long flags)
12251239
{
12261240
int ret;
12271241

12281242
zone_pcp_disable(page_zone(p));
1229-
if (ctxt == MF_SOFT_OFFLINE)
1230-
ret = get_any_page(p, flags);
1231-
else
1232-
ret = __get_hwpoison_page(p);
1243+
ret = get_any_page(p, flags);
12331244
zone_pcp_enable(page_zone(p));
12341245

12351246
return ret;
@@ -1418,27 +1429,33 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
14181429

14191430
num_poisoned_pages_inc();
14201431

1421-
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
1422-
/*
1423-
* Check "filter hit" and "race with other subpage."
1424-
*/
1425-
lock_page(head);
1426-
if (PageHWPoison(head)) {
1427-
if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
1428-
|| (p != head && TestSetPageHWPoison(head))) {
1429-
num_poisoned_pages_dec();
1430-
unlock_page(head);
1431-
return 0;
1432+
if (!(flags & MF_COUNT_INCREASED)) {
1433+
res = get_hwpoison_page(p, flags);
1434+
if (!res) {
1435+
/*
1436+
* Check "filter hit" and "race with other subpage."
1437+
*/
1438+
lock_page(head);
1439+
if (PageHWPoison(head)) {
1440+
if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
1441+
|| (p != head && TestSetPageHWPoison(head))) {
1442+
num_poisoned_pages_dec();
1443+
unlock_page(head);
1444+
return 0;
1445+
}
14321446
}
1447+
unlock_page(head);
1448+
res = MF_FAILED;
1449+
if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
1450+
page_ref_inc(p);
1451+
res = MF_RECOVERED;
1452+
}
1453+
action_result(pfn, MF_MSG_FREE_HUGE, res);
1454+
return res == MF_RECOVERED ? 0 : -EBUSY;
1455+
} else if (res < 0) {
1456+
action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
1457+
return -EBUSY;
14331458
}
1434-
unlock_page(head);
1435-
res = MF_FAILED;
1436-
if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
1437-
page_ref_inc(p);
1438-
res = MF_RECOVERED;
1439-
}
1440-
action_result(pfn, MF_MSG_FREE_HUGE, res);
1441-
return res == MF_RECOVERED ? 0 : -EBUSY;
14421459
}
14431460

14441461
lock_page(head);
@@ -1641,28 +1658,35 @@ int memory_failure(unsigned long pfn, int flags)
16411658
* In fact it's dangerous to directly bump up page count from 0,
16421659
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
16431660
*/
1644-
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
1645-
if (is_free_buddy_page(p)) {
1646-
if (take_page_off_buddy(p)) {
1647-
page_ref_inc(p);
1648-
res = MF_RECOVERED;
1649-
} else {
1650-
/* We lost the race, try again */
1651-
if (retry) {
1652-
ClearPageHWPoison(p);
1653-
num_poisoned_pages_dec();
1654-
retry = false;
1655-
goto try_again;
1661+
if (!(flags & MF_COUNT_INCREASED)) {
1662+
res = get_hwpoison_page(p, flags);
1663+
if (!res) {
1664+
if (is_free_buddy_page(p)) {
1665+
if (take_page_off_buddy(p)) {
1666+
page_ref_inc(p);
1667+
res = MF_RECOVERED;
1668+
} else {
1669+
/* We lost the race, try again */
1670+
if (retry) {
1671+
ClearPageHWPoison(p);
1672+
num_poisoned_pages_dec();
1673+
retry = false;
1674+
goto try_again;
1675+
}
1676+
res = MF_FAILED;
16561677
}
1657-
res = MF_FAILED;
1678+
action_result(pfn, MF_MSG_BUDDY, res);
1679+
res = res == MF_RECOVERED ? 0 : -EBUSY;
1680+
} else {
1681+
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
1682+
res = -EBUSY;
16581683
}
1659-
action_result(pfn, MF_MSG_BUDDY, res);
1660-
res = res == MF_RECOVERED ? 0 : -EBUSY;
1661-
} else {
1662-
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
1684+
goto unlock_mutex;
1685+
} else if (res < 0) {
1686+
action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
16631687
res = -EBUSY;
1688+
goto unlock_mutex;
16641689
}
1665-
goto unlock_mutex;
16661690
}
16671691

16681692
if (PageTransHuge(hpage)) {
@@ -1940,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
19401964
return 0;
19411965
}
19421966

1943-
if (!get_hwpoison_page(p, flags, 0)) {
1967+
if (!get_hwpoison_page(p, flags)) {
19441968
if (TestClearPageHWPoison(p))
19451969
num_poisoned_pages_dec();
19461970
unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -2156,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags)
21562180

21572181
retry:
21582182
get_online_mems();
2159-
ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
2183+
ret = get_hwpoison_page(page, flags);
21602184
put_online_mems();
21612185

21622186
if (ret > 0) {

0 commit comments

Comments
 (0)