42
42
#include <linux/export.h>
43
43
#include <linux/jump_label.h>
44
44
#include <linux/set_memory.h>
45
+ #include <linux/task_work.h>
45
46
46
47
#include <asm/intel-family.h>
47
48
#include <asm/processor.h>
@@ -1086,23 +1087,6 @@ static void mce_clear_state(unsigned long *toclear)
1086
1087
}
1087
1088
}
1088
1089
1089
- static int do_memory_failure (struct mce * m )
1090
- {
1091
- int flags = MF_ACTION_REQUIRED ;
1092
- int ret ;
1093
-
1094
- pr_err ("Uncorrected hardware memory error in user-access at %llx" , m -> addr );
1095
- if (!(m -> mcgstatus & MCG_STATUS_RIPV ))
1096
- flags |= MF_MUST_KILL ;
1097
- ret = memory_failure (m -> addr >> PAGE_SHIFT , flags );
1098
- if (ret )
1099
- pr_err ("Memory error not recovered" );
1100
- else
1101
- set_mce_nospec (m -> addr >> PAGE_SHIFT );
1102
- return ret ;
1103
- }
1104
-
1105
-
1106
1090
/*
1107
1091
* Cases where we avoid rendezvous handler timeout:
1108
1092
* 1) If this CPU is offline.
@@ -1204,6 +1188,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
1204
1188
* m = * final ;
1205
1189
}
1206
1190
1191
+ static void kill_me_now (struct callback_head * ch )
1192
+ {
1193
+ force_sig (SIGBUS );
1194
+ }
1195
+
1196
+ static void kill_me_maybe (struct callback_head * cb )
1197
+ {
1198
+ struct task_struct * p = container_of (cb , struct task_struct , mce_kill_me );
1199
+ int flags = MF_ACTION_REQUIRED ;
1200
+
1201
+ pr_err ("Uncorrected hardware memory error in user-access at %llx" , p -> mce_addr );
1202
+ if (!(p -> mce_status & MCG_STATUS_RIPV ))
1203
+ flags |= MF_MUST_KILL ;
1204
+
1205
+ if (!memory_failure (p -> mce_addr >> PAGE_SHIFT , flags )) {
1206
+ set_mce_nospec (p -> mce_addr >> PAGE_SHIFT );
1207
+ return ;
1208
+ }
1209
+
1210
+ pr_err ("Memory error not recovered" );
1211
+ kill_me_now (cb );
1212
+ }
1213
+
1207
1214
/*
1208
1215
* The actual machine check handler. This only handles real
1209
1216
* exceptions when something got corrupted coming in through int 18.
@@ -1222,7 +1229,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
1222
1229
* backing the user stack, tracing that reads the user stack will cause
1223
1230
* potentially infinite recursion.
1224
1231
*/
1225
- void notrace do_machine_check (struct pt_regs * regs , long error_code )
1232
+ void noinstr do_machine_check (struct pt_regs * regs , long error_code )
1226
1233
{
1227
1234
DECLARE_BITMAP (valid_banks , MAX_NR_BANKS );
1228
1235
DECLARE_BITMAP (toclear , MAX_NR_BANKS );
@@ -1354,13 +1361,13 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
1354
1361
if ((m .cs & 3 ) == 3 ) {
1355
1362
/* If this triggers there is no way to recover. Die hard. */
1356
1363
BUG_ON (!on_thread_stack () || !user_mode (regs ));
1357
- local_irq_enable ();
1358
- preempt_enable ();
1359
1364
1360
- if (kill_it || do_memory_failure (& m ))
1361
- force_sig (SIGBUS );
1362
- preempt_disable ();
1363
- local_irq_disable ();
1365
+ current -> mce_addr = m .addr ;
1366
+ current -> mce_status = m .mcgstatus ;
1367
+ current -> mce_kill_me .func = kill_me_maybe ;
1368
+ if (kill_it )
1369
+ current -> mce_kill_me .func = kill_me_now ;
1370
+ task_work_add (current , & current -> mce_kill_me , true);
1364
1371
} else {
1365
1372
if (!fixup_exception (regs , X86_TRAP_MC , error_code , 0 ))
1366
1373
mce_panic ("Failed kernel mode recovery" , & m , msg );
@@ -1370,7 +1377,6 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
1370
1377
ist_exit (regs );
1371
1378
}
1372
1379
EXPORT_SYMBOL_GPL (do_machine_check );
1373
- NOKPROBE_SYMBOL (do_machine_check );
1374
1380
1375
1381
#ifndef CONFIG_MEMORY_FAILURE
1376
1382
int memory_failure (unsigned long pfn , int flags )
0 commit comments