Skip to content

Commit 751dc83

Browse files
committed
genirq: Introduce common irq_force_complete_move() implementation
CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation of irq_force_complete_move() for CPU hotplug. At the moment, only x86 implements this unconditionally, but for RISC-V irq_force_complete_move() is only needed when the RISC-V IMSIC driver is in use and not needed otherwise. To allow runtime configuration of this mechanism, introduce a common irq_force_complete_move() implementation in the interrupt core code, which only invokes the completion function, when a interrupt chip in the hierarchy implements it. Switch X86 over to the new mechanism. No functional change intended. Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Anup Patel <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/all/[email protected]
1 parent fe35ece commit 751dc83

File tree

4 files changed

+123
-125
lines changed

4 files changed

+123
-125
lines changed

arch/x86/kernel/apic/vector.c

Lines changed: 108 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd,
888888
return err ? err : IRQ_SET_MASK_OK;
889889
}
890890

891+
static void free_moved_vector(struct apic_chip_data *apicd)
892+
{
893+
unsigned int vector = apicd->prev_vector;
894+
unsigned int cpu = apicd->prev_cpu;
895+
bool managed = apicd->is_managed;
896+
897+
/*
898+
* Managed interrupts are usually not migrated away
899+
* from an online CPU, but CPU isolation 'managed_irq'
900+
* can make that happen.
901+
* 1) Activation does not take the isolation into account
902+
* to keep the code simple
903+
* 2) Migration away from an isolated CPU can happen when
904+
* a non-isolated CPU which is in the calculated
905+
* affinity mask comes online.
906+
*/
907+
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
908+
irq_matrix_free(vector_matrix, cpu, vector, managed);
909+
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
910+
hlist_del_init(&apicd->clist);
911+
apicd->prev_vector = 0;
912+
apicd->move_in_progress = 0;
913+
}
914+
915+
/*
916+
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
917+
*/
918+
static void apic_force_complete_move(struct irq_data *irqd)
919+
{
920+
unsigned int cpu = smp_processor_id();
921+
struct apic_chip_data *apicd;
922+
unsigned int vector;
923+
924+
guard(raw_spinlock)(&vector_lock);
925+
apicd = apic_chip_data(irqd);
926+
if (!apicd)
927+
return;
928+
929+
/*
930+
* If prev_vector is empty or the descriptor is neither currently
931+
* nor previously on the outgoing CPU no action required.
932+
*/
933+
vector = apicd->prev_vector;
934+
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
935+
return;
936+
937+
/*
938+
* This is tricky. If the cleanup of the old vector has not been
939+
* done yet, then the following setaffinity call will fail with
940+
* -EBUSY. This can leave the interrupt in a stale state.
941+
*
942+
* All CPUs are stuck in stop machine with interrupts disabled so
943+
* calling __irq_complete_move() would be completely pointless.
944+
*
945+
* 1) The interrupt is in move_in_progress state. That means that we
946+
* have not seen an interrupt since the io_apic was reprogrammed to
947+
* the new vector.
948+
*
949+
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
950+
* have not been processed yet.
951+
*/
952+
if (apicd->move_in_progress) {
953+
/*
954+
* In theory there is a race:
955+
*
956+
* set_ioapic(new_vector) <-- Interrupt is raised before update
957+
* is effective, i.e. it's raised on
958+
* the old vector.
959+
*
960+
* So if the target cpu cannot handle that interrupt before
961+
* the old vector is cleaned up, we get a spurious interrupt
962+
* and in the worst case the ioapic irq line becomes stale.
963+
*
964+
* But in case of cpu hotplug this should be a non issue
965+
* because if the affinity update happens right before all
966+
* cpus rendezvous in stop machine, there is no way that the
967+
* interrupt can be blocked on the target cpu because all cpus
968+
* loops first with interrupts enabled in stop machine, so the
969+
* old vector is not yet cleaned up when the interrupt fires.
970+
*
971+
* So the only way to run into this issue is if the delivery
972+
* of the interrupt on the apic/system bus would be delayed
973+
* beyond the point where the target cpu disables interrupts
974+
* in stop machine. I doubt that it can happen, but at least
975+
* there is a theoretical chance. Virtualization might be
976+
* able to expose this, but AFAICT the IOAPIC emulation is not
977+
* as stupid as the real hardware.
978+
*
979+
* Anyway, there is nothing we can do about that at this point
980+
* w/o refactoring the whole fixup_irq() business completely.
981+
* We print at least the irq number and the old vector number,
982+
* so we have the necessary information when a problem in that
983+
* area arises.
984+
*/
985+
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
986+
irqd->irq, vector);
987+
}
988+
free_moved_vector(apicd);
989+
}
990+
891991
#else
892-
# define apic_set_affinity NULL
992+
# define apic_set_affinity NULL
993+
# define apic_force_complete_move NULL
893994
#endif
894995

895996
static int apic_retrigger_irq(struct irq_data *irqd)
@@ -923,39 +1024,16 @@ static void x86_vector_msi_compose_msg(struct irq_data *data,
9231024
}
9241025

9251026
static struct irq_chip lapic_controller = {
926-
.name = "APIC",
927-
.irq_ack = apic_ack_edge,
928-
.irq_set_affinity = apic_set_affinity,
929-
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
930-
.irq_retrigger = apic_retrigger_irq,
1027+
.name = "APIC",
1028+
.irq_ack = apic_ack_edge,
1029+
.irq_set_affinity = apic_set_affinity,
1030+
.irq_compose_msi_msg = x86_vector_msi_compose_msg,
1031+
.irq_force_complete_move = apic_force_complete_move,
1032+
.irq_retrigger = apic_retrigger_irq,
9311033
};
9321034

9331035
#ifdef CONFIG_SMP
9341036

935-
static void free_moved_vector(struct apic_chip_data *apicd)
936-
{
937-
unsigned int vector = apicd->prev_vector;
938-
unsigned int cpu = apicd->prev_cpu;
939-
bool managed = apicd->is_managed;
940-
941-
/*
942-
* Managed interrupts are usually not migrated away
943-
* from an online CPU, but CPU isolation 'managed_irq'
944-
* can make that happen.
945-
* 1) Activation does not take the isolation into account
946-
* to keep the code simple
947-
* 2) Migration away from an isolated CPU can happen when
948-
* a non-isolated CPU which is in the calculated
949-
* affinity mask comes online.
950-
*/
951-
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
952-
irq_matrix_free(vector_matrix, cpu, vector, managed);
953-
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
954-
hlist_del_init(&apicd->clist);
955-
apicd->prev_vector = 0;
956-
apicd->move_in_progress = 0;
957-
}
958-
9591037
static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
9601038
{
9611039
struct apic_chip_data *apicd;
@@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg)
10681146
__vector_schedule_cleanup(apicd);
10691147
}
10701148

1071-
/*
1072-
* Called from fixup_irqs() with @desc->lock held and interrupts disabled.
1073-
*/
1074-
void irq_force_complete_move(struct irq_desc *desc)
1075-
{
1076-
unsigned int cpu = smp_processor_id();
1077-
struct apic_chip_data *apicd;
1078-
struct irq_data *irqd;
1079-
unsigned int vector;
1080-
1081-
/*
1082-
* The function is called for all descriptors regardless of which
1083-
* irqdomain they belong to. For example if an IRQ is provided by
1084-
* an irq_chip as part of a GPIO driver, the chip data for that
1085-
* descriptor is specific to the irq_chip in question.
1086-
*
1087-
* Check first that the chip_data is what we expect
1088-
* (apic_chip_data) before touching it any further.
1089-
*/
1090-
irqd = irq_domain_get_irq_data(x86_vector_domain,
1091-
irq_desc_get_irq(desc));
1092-
if (!irqd)
1093-
return;
1094-
1095-
raw_spin_lock(&vector_lock);
1096-
apicd = apic_chip_data(irqd);
1097-
if (!apicd)
1098-
goto unlock;
1099-
1100-
/*
1101-
* If prev_vector is empty or the descriptor is neither currently
1102-
* nor previously on the outgoing CPU no action required.
1103-
*/
1104-
vector = apicd->prev_vector;
1105-
if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
1106-
goto unlock;
1107-
1108-
/*
1109-
* This is tricky. If the cleanup of the old vector has not been
1110-
* done yet, then the following setaffinity call will fail with
1111-
* -EBUSY. This can leave the interrupt in a stale state.
1112-
*
1113-
* All CPUs are stuck in stop machine with interrupts disabled so
1114-
* calling __irq_complete_move() would be completely pointless.
1115-
*
1116-
* 1) The interrupt is in move_in_progress state. That means that we
1117-
* have not seen an interrupt since the io_apic was reprogrammed to
1118-
* the new vector.
1119-
*
1120-
* 2) The interrupt has fired on the new vector, but the cleanup IPIs
1121-
* have not been processed yet.
1122-
*/
1123-
if (apicd->move_in_progress) {
1124-
/*
1125-
* In theory there is a race:
1126-
*
1127-
* set_ioapic(new_vector) <-- Interrupt is raised before update
1128-
* is effective, i.e. it's raised on
1129-
* the old vector.
1130-
*
1131-
* So if the target cpu cannot handle that interrupt before
1132-
* the old vector is cleaned up, we get a spurious interrupt
1133-
* and in the worst case the ioapic irq line becomes stale.
1134-
*
1135-
* But in case of cpu hotplug this should be a non issue
1136-
* because if the affinity update happens right before all
1137-
* cpus rendezvous in stop machine, there is no way that the
1138-
* interrupt can be blocked on the target cpu because all cpus
1139-
* loops first with interrupts enabled in stop machine, so the
1140-
* old vector is not yet cleaned up when the interrupt fires.
1141-
*
1142-
* So the only way to run into this issue is if the delivery
1143-
* of the interrupt on the apic/system bus would be delayed
1144-
* beyond the point where the target cpu disables interrupts
1145-
* in stop machine. I doubt that it can happen, but at least
1146-
* there is a theoretical chance. Virtualization might be
1147-
* able to expose this, but AFAICT the IOAPIC emulation is not
1148-
* as stupid as the real hardware.
1149-
*
1150-
* Anyway, there is nothing we can do about that at this point
1151-
* w/o refactoring the whole fixup_irq() business completely.
1152-
* We print at least the irq number and the old vector number,
1153-
* so we have the necessary information when a problem in that
1154-
* area arises.
1155-
*/
1156-
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
1157-
irqd->irq, vector);
1158-
}
1159-
free_moved_vector(apicd);
1160-
unlock:
1161-
raw_spin_unlock(&vector_lock);
1162-
}
1163-
11641149
#ifdef CONFIG_HOTPLUG_CPU
11651150
/*
11661151
* Note, this is not accurate accounting, but at least good enough to

include/linux/irq.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
486486
* @ipi_send_mask: send an IPI to destination cpus in cpumask
487487
* @irq_nmi_setup: function called from core code before enabling an NMI
488488
* @irq_nmi_teardown: function called from core code after disabling an NMI
489+
* @irq_force_complete_move: optional function to force complete pending irq move
489490
* @flags: chip specific flags
490491
*/
491492
struct irq_chip {
@@ -537,6 +538,8 @@ struct irq_chip {
537538
int (*irq_nmi_setup)(struct irq_data *data);
538539
void (*irq_nmi_teardown)(struct irq_data *data);
539540

541+
void (*irq_force_complete_move)(struct irq_data *data);
542+
540543
unsigned long flags;
541544
};
542545

@@ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data)
619622
__irq_move_irq(data);
620623
}
621624
void irq_move_masked_irq(struct irq_data *data);
622-
void irq_force_complete_move(struct irq_desc *desc);
623625
#else
624626
static inline void irq_move_irq(struct irq_data *data) { }
625627
static inline void irq_move_masked_irq(struct irq_data *data) { }
626-
static inline void irq_force_complete_move(struct irq_desc *desc) { }
627628
#endif
628629

629630
extern int no_irq_affinity;

kernel/irq/internals.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
442442
return desc->pending_mask;
443443
}
444444
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
445+
void irq_force_complete_move(struct irq_desc *desc);
445446
#else /* CONFIG_GENERIC_PENDING_IRQ */
446447
static inline bool irq_can_move_pcntxt(struct irq_data *data)
447448
{
@@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
467468
{
468469
return false;
469470
}
471+
static inline void irq_force_complete_move(struct irq_desc *desc) { }
470472
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
471473

472474
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)

kernel/irq/migration.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
3535
return true;
3636
}
3737

38+
void irq_force_complete_move(struct irq_desc *desc)
39+
{
40+
for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
41+
if (d->chip && d->chip->irq_force_complete_move) {
42+
d->chip->irq_force_complete_move(d);
43+
return;
44+
}
45+
}
46+
}
47+
3848
void irq_move_masked_irq(struct irq_data *idata)
3949
{
4050
struct irq_desc *desc = irq_data_to_desc(idata);

0 commit comments

Comments
 (0)