@@ -1117,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page)
1117
1117
return PageLRU (page ) || __PageMovable (page );
1118
1118
}
1119
1119
1120
- /**
1121
- * __get_hwpoison_page() - Get refcount for memory error handling:
1122
- * @page: raw error page (hit by memory error)
1123
- *
1124
- * Return: return 0 if failed to grab the refcount, otherwise true (some
1125
- * non-zero value.)
1126
- */
1127
1120
static int __get_hwpoison_page (struct page * page )
1128
1121
{
1129
1122
struct page * head = compound_head (page );
@@ -1168,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page)
1168
1161
return 0 ;
1169
1162
}
1170
1163
1171
- /*
1172
- * Safely get reference count of an arbitrary page.
1173
- *
1174
- * Returns 0 for a free page, 1 for an in-use page,
1175
- * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
1176
- * allocation.
1177
- * We only incremented refcount in case the page was already in-use and it
1178
- * is a known type we can handle.
1179
- */
1180
1164
static int get_any_page (struct page * p , unsigned long flags )
1181
1165
{
1182
1166
int ret = 0 , pass = 0 ;
@@ -1186,50 +1170,77 @@ static int get_any_page(struct page *p, unsigned long flags)
1186
1170
count_increased = true;
1187
1171
1188
1172
try_again :
1189
- if (!count_increased && !__get_hwpoison_page (p )) {
1190
- if (page_count (p )) {
1191
- /* We raced with an allocation, retry. */
1192
- if (pass ++ < 3 )
1193
- goto try_again ;
1194
- ret = - EBUSY ;
1195
- } else if (!PageHuge (p ) && !is_free_buddy_page (p )) {
1196
- /* We raced with put_page, retry. */
1173
+ if (!count_increased ) {
1174
+ ret = __get_hwpoison_page (p );
1175
+ if (!ret ) {
1176
+ if (page_count (p )) {
1177
+ /* We raced with an allocation, retry. */
1178
+ if (pass ++ < 3 )
1179
+ goto try_again ;
1180
+ ret = - EBUSY ;
1181
+ } else if (!PageHuge (p ) && !is_free_buddy_page (p )) {
1182
+ /* We raced with put_page, retry. */
1183
+ if (pass ++ < 3 )
1184
+ goto try_again ;
1185
+ ret = - EIO ;
1186
+ }
1187
+ goto out ;
1188
+ } else if (ret == - EBUSY ) {
1189
+ /* We raced with freeing huge page to buddy, retry. */
1197
1190
if (pass ++ < 3 )
1198
1191
goto try_again ;
1199
- ret = - EIO ;
1192
+ goto out ;
1200
1193
}
1194
+ }
1195
+
1196
+ if (PageHuge (p ) || HWPoisonHandlable (p )) {
1197
+ ret = 1 ;
1201
1198
} else {
1202
- if (PageHuge (p ) || HWPoisonHandlable (p )) {
1203
- ret = 1 ;
1204
- } else {
1205
- /*
1206
- * A page we cannot handle. Check whether we can turn
1207
- * it into something we can handle.
1208
- */
1209
- if (pass ++ < 3 ) {
1210
- put_page (p );
1211
- shake_page (p , 1 );
1212
- count_increased = false;
1213
- goto try_again ;
1214
- }
1199
+ /*
1200
+ * A page we cannot handle. Check whether we can turn
1201
+ * it into something we can handle.
1202
+ */
1203
+ if (pass ++ < 3 ) {
1215
1204
put_page (p );
1216
- ret = - EIO ;
1205
+ shake_page (p , 1 );
1206
+ count_increased = false;
1207
+ goto try_again ;
1217
1208
}
1209
+ put_page (p );
1210
+ ret = - EIO ;
1218
1211
}
1219
-
1212
+ out :
1220
1213
return ret ;
1221
1214
}
1222
1215
1223
- static int get_hwpoison_page (struct page * p , unsigned long flags ,
1224
- enum mf_flags ctxt )
1216
+ /**
1217
+ * get_hwpoison_page() - Get refcount for memory error handling
1218
+ * @p: Raw error page (hit by memory error)
1219
+ * @flags: Flags controlling behavior of error handling
1220
+ *
1221
+ * get_hwpoison_page() takes a page refcount of an error page to handle memory
1222
+ * error on it, after checking that the error page is in a well-defined state
1223
+ * (defined as a page-type we can successfully handle the memor error on it,
1224
+ * such as LRU page and hugetlb page).
1225
+ *
1226
+ * Memory error handling could be triggered at any time on any type of page,
1227
+ * so it's prone to race with typical memory management lifecycle (like
1228
+ * allocation and free). So to avoid such races, get_hwpoison_page() takes
1229
+ * extra care for the error page's state (as done in __get_hwpoison_page()),
1230
+ * and has some retry logic in get_any_page().
1231
+ *
1232
+ * Return: 0 on failure,
1233
+ * 1 on success for in-use pages in a well-defined state,
1234
+ * -EIO for pages on which we can not handle memory errors,
1235
+ * -EBUSY when get_hwpoison_page() has raced with page lifecycle
1236
+ * operations like allocation and free.
1237
+ */
1238
+ static int get_hwpoison_page (struct page * p , unsigned long flags )
1225
1239
{
1226
1240
int ret ;
1227
1241
1228
1242
zone_pcp_disable (page_zone (p ));
1229
- if (ctxt == MF_SOFT_OFFLINE )
1230
- ret = get_any_page (p , flags );
1231
- else
1232
- ret = __get_hwpoison_page (p );
1243
+ ret = get_any_page (p , flags );
1233
1244
zone_pcp_enable (page_zone (p ));
1234
1245
1235
1246
return ret ;
@@ -1418,27 +1429,33 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
1418
1429
1419
1430
num_poisoned_pages_inc ();
1420
1431
1421
- if (!(flags & MF_COUNT_INCREASED ) && !get_hwpoison_page (p , flags , 0 )) {
1422
- /*
1423
- * Check "filter hit" and "race with other subpage."
1424
- */
1425
- lock_page (head );
1426
- if (PageHWPoison (head )) {
1427
- if ((hwpoison_filter (p ) && TestClearPageHWPoison (p ))
1428
- || (p != head && TestSetPageHWPoison (head ))) {
1429
- num_poisoned_pages_dec ();
1430
- unlock_page (head );
1431
- return 0 ;
1432
+ if (!(flags & MF_COUNT_INCREASED )) {
1433
+ res = get_hwpoison_page (p , flags );
1434
+ if (!res ) {
1435
+ /*
1436
+ * Check "filter hit" and "race with other subpage."
1437
+ */
1438
+ lock_page (head );
1439
+ if (PageHWPoison (head )) {
1440
+ if ((hwpoison_filter (p ) && TestClearPageHWPoison (p ))
1441
+ || (p != head && TestSetPageHWPoison (head ))) {
1442
+ num_poisoned_pages_dec ();
1443
+ unlock_page (head );
1444
+ return 0 ;
1445
+ }
1432
1446
}
1447
+ unlock_page (head );
1448
+ res = MF_FAILED ;
1449
+ if (!dissolve_free_huge_page (p ) && take_page_off_buddy (p )) {
1450
+ page_ref_inc (p );
1451
+ res = MF_RECOVERED ;
1452
+ }
1453
+ action_result (pfn , MF_MSG_FREE_HUGE , res );
1454
+ return res == MF_RECOVERED ? 0 : - EBUSY ;
1455
+ } else if (res < 0 ) {
1456
+ action_result (pfn , MF_MSG_UNKNOWN , MF_IGNORED );
1457
+ return - EBUSY ;
1433
1458
}
1434
- unlock_page (head );
1435
- res = MF_FAILED ;
1436
- if (!dissolve_free_huge_page (p ) && take_page_off_buddy (p )) {
1437
- page_ref_inc (p );
1438
- res = MF_RECOVERED ;
1439
- }
1440
- action_result (pfn , MF_MSG_FREE_HUGE , res );
1441
- return res == MF_RECOVERED ? 0 : - EBUSY ;
1442
1459
}
1443
1460
1444
1461
lock_page (head );
@@ -1641,28 +1658,35 @@ int memory_failure(unsigned long pfn, int flags)
1641
1658
* In fact it's dangerous to directly bump up page count from 0,
1642
1659
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
1643
1660
*/
1644
- if (!(flags & MF_COUNT_INCREASED ) && !get_hwpoison_page (p , flags , 0 )) {
1645
- if (is_free_buddy_page (p )) {
1646
- if (take_page_off_buddy (p )) {
1647
- page_ref_inc (p );
1648
- res = MF_RECOVERED ;
1649
- } else {
1650
- /* We lost the race, try again */
1651
- if (retry ) {
1652
- ClearPageHWPoison (p );
1653
- num_poisoned_pages_dec ();
1654
- retry = false;
1655
- goto try_again ;
1661
+ if (!(flags & MF_COUNT_INCREASED )) {
1662
+ res = get_hwpoison_page (p , flags );
1663
+ if (!res ) {
1664
+ if (is_free_buddy_page (p )) {
1665
+ if (take_page_off_buddy (p )) {
1666
+ page_ref_inc (p );
1667
+ res = MF_RECOVERED ;
1668
+ } else {
1669
+ /* We lost the race, try again */
1670
+ if (retry ) {
1671
+ ClearPageHWPoison (p );
1672
+ num_poisoned_pages_dec ();
1673
+ retry = false;
1674
+ goto try_again ;
1675
+ }
1676
+ res = MF_FAILED ;
1656
1677
}
1657
- res = MF_FAILED ;
1678
+ action_result (pfn , MF_MSG_BUDDY , res );
1679
+ res = res == MF_RECOVERED ? 0 : - EBUSY ;
1680
+ } else {
1681
+ action_result (pfn , MF_MSG_KERNEL_HIGH_ORDER , MF_IGNORED );
1682
+ res = - EBUSY ;
1658
1683
}
1659
- action_result (pfn , MF_MSG_BUDDY , res );
1660
- res = res == MF_RECOVERED ? 0 : - EBUSY ;
1661
- } else {
1662
- action_result (pfn , MF_MSG_KERNEL_HIGH_ORDER , MF_IGNORED );
1684
+ goto unlock_mutex ;
1685
+ } else if (res < 0 ) {
1686
+ action_result (pfn , MF_MSG_UNKNOWN , MF_IGNORED );
1663
1687
res = - EBUSY ;
1688
+ goto unlock_mutex ;
1664
1689
}
1665
- goto unlock_mutex ;
1666
1690
}
1667
1691
1668
1692
if (PageTransHuge (hpage )) {
@@ -1940,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
1940
1964
return 0 ;
1941
1965
}
1942
1966
1943
- if (!get_hwpoison_page (p , flags , 0 )) {
1967
+ if (!get_hwpoison_page (p , flags )) {
1944
1968
if (TestClearPageHWPoison (p ))
1945
1969
num_poisoned_pages_dec ();
1946
1970
unpoison_pr_info ("Unpoison: Software-unpoisoned free page %#lx\n" ,
@@ -2156,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags)
2156
2180
2157
2181
retry :
2158
2182
get_online_mems ();
2159
- ret = get_hwpoison_page (page , flags , MF_SOFT_OFFLINE );
2183
+ ret = get_hwpoison_page (page , flags );
2160
2184
put_online_mems ();
2161
2185
2162
2186
if (ret > 0 ) {
0 commit comments