@@ -272,8 +272,9 @@ struct microcode_ctrl {
272
272
273
273
DEFINE_STATIC_KEY_FALSE (microcode_nmi_handler_enable );
274
274
static DEFINE_PER_CPU (struct microcode_ctrl , ucode_ctrl ) ;
275
+ static atomic_t late_cpus_in , offline_in_nmi ;
275
276
static unsigned int loops_per_usec ;
276
- static atomic_t late_cpus_in ;
277
+ static cpumask_t cpu_offline_mask ;
277
278
278
279
static noinstr bool wait_for_cpus (atomic_t * cnt )
279
280
{
@@ -381,7 +382,7 @@ static noinstr void load_secondary(unsigned int cpu)
381
382
instrumentation_end ();
382
383
}
383
384
384
- static void load_primary (unsigned int cpu )
385
+ static void __load_primary (unsigned int cpu )
385
386
{
386
387
struct cpumask * secondaries = topology_sibling_cpumask (cpu );
387
388
enum sibling_ctrl ctrl ;
@@ -416,6 +417,67 @@ static void load_primary(unsigned int cpu)
416
417
}
417
418
}
418
419
420
+ static bool kick_offline_cpus (unsigned int nr_offl )
421
+ {
422
+ unsigned int cpu , timeout ;
423
+
424
+ for_each_cpu (cpu , & cpu_offline_mask ) {
425
+ /* Enable the rendezvous handler and send NMI */
426
+ per_cpu (ucode_ctrl .nmi_enabled , cpu ) = true;
427
+ apic_send_nmi_to_offline_cpu (cpu );
428
+ }
429
+
430
+ /* Wait for them to arrive */
431
+ for (timeout = 0 ; timeout < (USEC_PER_SEC / 2 ); timeout ++ ) {
432
+ if (atomic_read (& offline_in_nmi ) == nr_offl )
433
+ return true;
434
+ udelay (1 );
435
+ }
436
+ /* Let the others time out */
437
+ return false;
438
+ }
439
+
440
+ static void release_offline_cpus (void )
441
+ {
442
+ unsigned int cpu ;
443
+
444
+ for_each_cpu (cpu , & cpu_offline_mask )
445
+ per_cpu (ucode_ctrl .ctrl , cpu ) = SCTRL_DONE ;
446
+ }
447
+
448
+ static void load_primary (unsigned int cpu )
449
+ {
450
+ unsigned int nr_offl = cpumask_weight (& cpu_offline_mask );
451
+ bool proceed = true;
452
+
453
+ /* Kick soft-offlined SMT siblings if required */
454
+ if (!cpu && nr_offl )
455
+ proceed = kick_offline_cpus (nr_offl );
456
+
457
+ /* If the soft-offlined CPUs did not respond, abort */
458
+ if (proceed )
459
+ __load_primary (cpu );
460
+
461
+ /* Unconditionally release soft-offlined SMT siblings if required */
462
+ if (!cpu && nr_offl )
463
+ release_offline_cpus ();
464
+ }
465
+
466
+ /*
467
+ * Minimal stub rendezvous handler for soft-offlined CPUs which participate
468
+ * in the NMI rendezvous to protect against a concurrent NMI on affected
469
+ * CPUs.
470
+ */
471
+ void noinstr microcode_offline_nmi_handler (void )
472
+ {
473
+ if (!raw_cpu_read (ucode_ctrl .nmi_enabled ))
474
+ return ;
475
+ raw_cpu_write (ucode_ctrl .nmi_enabled , false);
476
+ raw_cpu_write (ucode_ctrl .result , UCODE_OFFLINE );
477
+ raw_atomic_inc (& offline_in_nmi );
478
+ wait_for_ctrl ();
479
+ }
480
+
419
481
static noinstr bool microcode_update_handler (void )
420
482
{
421
483
unsigned int cpu = raw_smp_processor_id ();
@@ -472,13 +534,15 @@ static int load_cpus_stopped(void *unused)
472
534
static int load_late_stop_cpus (void )
473
535
{
474
536
unsigned int cpu , updated = 0 , failed = 0 , timedout = 0 , siblings = 0 ;
537
+ unsigned int nr_offl , offline = 0 ;
475
538
int old_rev = boot_cpu_data .microcode ;
476
539
struct cpuinfo_x86 prev_info ;
477
540
478
541
pr_err ("Attempting late microcode loading - it is dangerous and taints the kernel.\n" );
479
542
pr_err ("You should switch to early loading, if possible.\n" );
480
543
481
544
atomic_set (& late_cpus_in , num_online_cpus ());
545
+ atomic_set (& offline_in_nmi , 0 );
482
546
loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000 );
483
547
484
548
/*
@@ -501,6 +565,7 @@ static int load_late_stop_cpus(void)
501
565
case UCODE_UPDATED : updated ++ ; break ;
502
566
case UCODE_TIMEOUT : timedout ++ ; break ;
503
567
case UCODE_OK : siblings ++ ; break ;
568
+ case UCODE_OFFLINE : offline ++ ; break ;
504
569
default : failed ++ ; break ;
505
570
}
506
571
}
@@ -512,6 +577,13 @@ static int load_late_stop_cpus(void)
512
577
/* Nothing changed. */
513
578
if (!failed && !timedout )
514
579
return 0 ;
580
+
581
+ nr_offl = cpumask_weight (& cpu_offline_mask );
582
+ if (offline < nr_offl ) {
583
+ pr_warn ("%u offline siblings did not respond.\n" ,
584
+ nr_offl - atomic_read (& offline_in_nmi ));
585
+ return - EIO ;
586
+ }
515
587
pr_err ("update failed: %u CPUs failed %u CPUs timed out\n" ,
516
588
failed , timedout );
517
589
return - EIO ;
@@ -545,19 +617,49 @@ static int load_late_stop_cpus(void)
545
617
* modern CPUs uses MWAIT, which is also not guaranteed to be safe
546
618
* against a microcode update which affects MWAIT.
547
619
*
548
- * 2) Initialize the per CPU control structure
620
+ * As soft-offlined CPUs still react on NMIs, the SMT sibling
621
+ * restriction can be lifted when the vendor driver signals to use NMI
622
+ * for rendezvous and the APIC provides a mechanism to send an NMI to a
623
+ * soft-offlined CPU. The soft-offlined CPUs are then able to
624
+ * participate in the rendezvous in a trivial stub handler.
625
+ *
626
+ * 2) Initialize the per CPU control structure and create a cpumask
627
+ * which contains "offline"; secondary threads, so they can be handled
628
+ * correctly by a control CPU.
549
629
*/
550
630
static bool setup_cpus (void )
551
631
{
552
632
struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT , .result = -1 , };
633
+ bool allow_smt_offline ;
553
634
unsigned int cpu ;
554
635
636
+ allow_smt_offline = microcode_ops -> nmi_safe ||
637
+ (microcode_ops -> use_nmi && apic -> nmi_to_offline_cpu );
638
+
639
+ cpumask_clear (& cpu_offline_mask );
640
+
555
641
for_each_cpu_and (cpu , cpu_present_mask , & cpus_booted_once_mask ) {
642
+ /*
643
+ * Offline CPUs sit in one of the play_dead() functions
644
+ * with interrupts disabled, but they still react on NMIs
645
+ * and execute arbitrary code. Also MWAIT being updated
646
+ * while the offline CPU sits there is not necessarily safe
647
+ * on all CPU variants.
648
+ *
649
+ * Mark them in the offline_cpus mask which will be handled
650
+ * by CPU0 later in the update process.
651
+ *
652
+ * Ensure that the primary thread is online so that it is
653
+ * guaranteed that all cores are updated.
654
+ */
556
655
if (!cpu_online (cpu )) {
557
- if (topology_is_primary_thread (cpu ) || !microcode_ops -> nmi_safe ) {
558
- pr_err ("CPU %u not online\n" , cpu );
656
+ if (topology_is_primary_thread (cpu ) || !allow_smt_offline ) {
657
+ pr_err ("CPU %u not online, loading aborted \n" , cpu );
559
658
return false;
560
659
}
660
+ cpumask_set_cpu (cpu , & cpu_offline_mask );
661
+ per_cpu (ucode_ctrl , cpu ) = ctrl ;
662
+ continue ;
561
663
}
562
664
563
665
/*
0 commit comments