@@ -301,85 +301,65 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
301
301
}
302
302
}
303
303
304
- static __always_inline int mce_severity_amd_smca (struct mce * m , enum context err_ctx )
304
+ /* See AMD PPR(s) section Machine Check Error Handling. */
305
+ static noinstr int mce_severity_amd (struct mce * m , struct pt_regs * regs , char * * msg , bool is_excp )
305
306
{
306
- u64 mcx_cfg ;
307
+ char * panic_msg = NULL ;
308
+ int ret ;
307
309
308
310
/*
309
- * We need to look at the following bits:
310
- * - "succor" bit (data poisoning support), and
311
- * - TCC bit (Task Context Corrupt)
312
- * in MCi_STATUS to determine error severity.
311
+ * Default return value: Action required, the error must be handled
312
+ * immediately.
313
313
*/
314
- if (!mce_flags .succor )
315
- return MCE_PANIC_SEVERITY ;
316
-
317
- mcx_cfg = mce_rdmsrl (MSR_AMD64_SMCA_MCx_CONFIG (m -> bank ));
318
-
319
- /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
320
- if ((mcx_cfg & MCI_CONFIG_MCAX ) &&
321
- (m -> status & MCI_STATUS_TCC ) &&
322
- (err_ctx == IN_KERNEL ))
323
- return MCE_PANIC_SEVERITY ;
324
-
325
- /* ...otherwise invoke hwpoison handler. */
326
- return MCE_AR_SEVERITY ;
327
- }
328
-
329
- /*
330
- * See AMD Error Scope Hierarchy table in a newer BKDG. For example
331
- * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
332
- */
333
- static noinstr int mce_severity_amd (struct mce * m , struct pt_regs * regs , char * * msg , bool is_excp )
334
- {
335
- enum context ctx = error_context (m , regs );
314
+ ret = MCE_AR_SEVERITY ;
336
315
337
316
/* Processor Context Corrupt, no need to fumble too much, die! */
338
- if (m -> status & MCI_STATUS_PCC )
339
- return MCE_PANIC_SEVERITY ;
340
-
341
- if (m -> status & MCI_STATUS_UC ) {
342
-
343
- if (ctx == IN_KERNEL )
344
- return MCE_PANIC_SEVERITY ;
317
+ if (m -> status & MCI_STATUS_PCC ) {
318
+ panic_msg = "Processor Context Corrupt" ;
319
+ ret = MCE_PANIC_SEVERITY ;
320
+ goto out ;
321
+ }
345
322
346
- /*
347
- * On older systems where overflow_recov flag is not present, we
348
- * should simply panic if an error overflow occurs. If
349
- * overflow_recov flag is present and set, then software can try
350
- * to at least kill process to prolong system operation.
351
- */
352
- if (mce_flags .overflow_recov ) {
353
- if (mce_flags .smca )
354
- return mce_severity_amd_smca (m , ctx );
355
-
356
- /* kill current process */
357
- return MCE_AR_SEVERITY ;
358
- } else {
359
- /* at least one error was not logged */
360
- if (m -> status & MCI_STATUS_OVER )
361
- return MCE_PANIC_SEVERITY ;
362
- }
363
-
364
- /*
365
- * For any other case, return MCE_UC_SEVERITY so that we log the
366
- * error and exit #MC handler.
367
- */
368
- return MCE_UC_SEVERITY ;
323
+ if (m -> status & MCI_STATUS_DEFERRED ) {
324
+ ret = MCE_DEFERRED_SEVERITY ;
325
+ goto out ;
369
326
}
370
327
371
328
/*
372
- * deferred error: poll handler catches these and adds to mce_ring so
373
- * memory-failure can take recovery actions .
329
+ * If the UC bit is not set, the system either corrected or deferred
330
+ * the error. No action will be required after logging the error .
374
331
*/
375
- if (m -> status & MCI_STATUS_DEFERRED )
376
- return MCE_DEFERRED_SEVERITY ;
332
+ if (!(m -> status & MCI_STATUS_UC )) {
333
+ ret = MCE_KEEP_SEVERITY ;
334
+ goto out ;
335
+ }
377
336
378
337
/*
379
- * corrected error: poll handler catches these and passes responsibility
380
- * of decoding the error to EDAC
338
+ * On MCA overflow, without the MCA overflow recovery feature the
339
+ * system will not be able to recover, panic.
381
340
*/
382
- return MCE_KEEP_SEVERITY ;
341
+ if ((m -> status & MCI_STATUS_OVER ) && !mce_flags .overflow_recov ) {
342
+ panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery" ;
343
+ ret = MCE_PANIC_SEVERITY ;
344
+ goto out ;
345
+ }
346
+
347
+ if (!mce_flags .succor ) {
348
+ panic_msg = "Uncorrected error without MCA Recovery" ;
349
+ ret = MCE_PANIC_SEVERITY ;
350
+ goto out ;
351
+ }
352
+
353
+ if (error_context (m , regs ) == IN_KERNEL ) {
354
+ panic_msg = "Uncorrected unrecoverable error in kernel context" ;
355
+ ret = MCE_PANIC_SEVERITY ;
356
+ }
357
+
358
+ out :
359
+ if (msg && panic_msg )
360
+ * msg = panic_msg ;
361
+
362
+ return ret ;
383
363
}
384
364
385
365
static noinstr int mce_severity_intel (struct mce * m , struct pt_regs * regs , char * * msg , bool is_excp )
0 commit comments