Skip to content

Commit c4f14ea

Browse files
committed
Merge tag 'irq-urgent-2021-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq fixes from Thomas Gleixner: "A set of fixes for PCI/MSI and x86 interrupt startup: - Mask all MSI-X entries when enabling MSI-X otherwise stale unmasked entries stay around e.g. when a crashkernel is booted. - Enforce masking of a MSI-X table entry when updating it, which mandatory according to speification - Ensure that writes to MSI[-X} tables are flushed. - Prevent invalid bits being set in the MSI mask register - Properly serialize modifications to the mask cache and the mask register for multi-MSI. - Cure the violation of the affinity setting rules on X86 during interrupt startup which can cause lost and stale interrupts. Move the initial affinity setting ahead of actualy enabling the interrupt. - Ensure that MSI interrupts are completely torn down before freeing them in the error handling case. - Prevent an array out of bounds access in the irq timings code" * tag 'irq-urgent-2021-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: driver core: Add missing kernel doc for device::msi_lock genirq/msi: Ensure deactivation on teardown genirq/timings: Prevent potential array overflow in __irq_timings_store() x86/msi: Force affinity setup before startup x86/ioapic: Force affinity setup before startup genirq: Provide IRQCHIP_AFFINITY_PRE_STARTUP PCI/MSI: Protect msi_desc::masked for multi-MSI PCI/MSI: Use msi_mask_irq() in pci_msi_shutdown() PCI/MSI: Correct misleading comments PCI/MSI: Do not set invalid bits in MSI mask PCI/MSI: Enforce MSI[X] entry updates to be visible PCI/MSI: Enforce that MSI-X table entry is masked for update PCI/MSI: Mask all unused MSI-X entries PCI/MSI: Enable and mask MSI-X early
2 parents 839da25 + 7a3dc4f commit c4f14ea

File tree

11 files changed

+113
-61
lines changed

11 files changed

+113
-61
lines changed

arch/x86/kernel/apic/io_apic.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = {
19861986
.irq_set_affinity = ioapic_set_affinity,
19871987
.irq_retrigger = irq_chip_retrigger_hierarchy,
19881988
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
1989-
.flags = IRQCHIP_SKIP_SET_WAKE,
1989+
.flags = IRQCHIP_SKIP_SET_WAKE |
1990+
IRQCHIP_AFFINITY_PRE_STARTUP,
19901991
};
19911992

19921993
static struct irq_chip ioapic_ir_chip __read_mostly = {
@@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
19992000
.irq_set_affinity = ioapic_set_affinity,
20002001
.irq_retrigger = irq_chip_retrigger_hierarchy,
20012002
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
2002-
.flags = IRQCHIP_SKIP_SET_WAKE,
2003+
.flags = IRQCHIP_SKIP_SET_WAKE |
2004+
IRQCHIP_AFFINITY_PRE_STARTUP,
20032005
};
20042006

20052007
static inline void init_IO_APIC_traps(void)

arch/x86/kernel/apic/msi.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
5858
* The quirk bit is not set in this case.
5959
* - The new vector is the same as the old vector
6060
* - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
61+
* - The interrupt is not yet started up
6162
* - The new destination CPU is the same as the old destination CPU
6263
*/
6364
if (!irqd_msi_nomask_quirk(irqd) ||
6465
cfg->vector == old_cfg.vector ||
6566
old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
67+
!irqd_is_started(irqd) ||
6668
cfg->dest_apicid == old_cfg.dest_apicid) {
6769
irq_msi_update_msg(irqd, cfg);
6870
return ret;
@@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = {
150152
.irq_ack = irq_chip_ack_parent,
151153
.irq_retrigger = irq_chip_retrigger_hierarchy,
152154
.irq_set_affinity = msi_set_affinity,
153-
.flags = IRQCHIP_SKIP_SET_WAKE,
155+
.flags = IRQCHIP_SKIP_SET_WAKE |
156+
IRQCHIP_AFFINITY_PRE_STARTUP,
154157
};
155158

156159
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
@@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = {
219222
.irq_mask = pci_msi_mask_irq,
220223
.irq_ack = irq_chip_ack_parent,
221224
.irq_retrigger = irq_chip_retrigger_hierarchy,
222-
.flags = IRQCHIP_SKIP_SET_WAKE,
225+
.flags = IRQCHIP_SKIP_SET_WAKE |
226+
IRQCHIP_AFFINITY_PRE_STARTUP,
223227
};
224228

225229
static struct msi_domain_info pci_msi_ir_domain_info = {
@@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = {
273277
.irq_retrigger = irq_chip_retrigger_hierarchy,
274278
.irq_compose_msi_msg = dmar_msi_compose_msg,
275279
.irq_write_msi_msg = dmar_msi_write_msg,
276-
.flags = IRQCHIP_SKIP_SET_WAKE,
280+
.flags = IRQCHIP_SKIP_SET_WAKE |
281+
IRQCHIP_AFFINITY_PRE_STARTUP,
277282
};
278283

279284
static int dmar_msi_init(struct irq_domain *domain,

arch/x86/kernel/hpet.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = {
508508
.irq_set_affinity = msi_domain_set_affinity,
509509
.irq_retrigger = irq_chip_retrigger_hierarchy,
510510
.irq_write_msi_msg = hpet_msi_write_msg,
511-
.flags = IRQCHIP_SKIP_SET_WAKE,
511+
.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP,
512512
};
513513

514514
static int hpet_msi_init(struct irq_domain *domain,

drivers/base/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,6 +2837,7 @@ void device_initialize(struct device *dev)
28372837
device_pm_init(dev);
28382838
set_dev_node(dev, -1);
28392839
#ifdef CONFIG_GENERIC_MSI_IRQ
2840+
raw_spin_lock_init(&dev->msi_lock);
28402841
INIT_LIST_HEAD(&dev->msi_list);
28412842
#endif
28422843
INIT_LIST_HEAD(&dev->links.consumers);

drivers/pci/msi.c

Lines changed: 77 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -143,24 +143,25 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
143143
* reliably as devices without an INTx disable bit will then generate a
144144
* level IRQ which will never be cleared.
145145
*/
146-
u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
146+
void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
147147
{
148-
u32 mask_bits = desc->masked;
148+
raw_spinlock_t *lock = &desc->dev->msi_lock;
149+
unsigned long flags;
149150

150151
if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
151-
return 0;
152+
return;
152153

153-
mask_bits &= ~mask;
154-
mask_bits |= flag;
154+
raw_spin_lock_irqsave(lock, flags);
155+
desc->masked &= ~mask;
156+
desc->masked |= flag;
155157
pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos,
156-
mask_bits);
157-
158-
return mask_bits;
158+
desc->masked);
159+
raw_spin_unlock_irqrestore(lock, flags);
159160
}
160161

161162
static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
162163
{
163-
desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag);
164+
__pci_msi_desc_mask_irq(desc, mask, flag);
164165
}
165166

166167
static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
@@ -289,13 +290,31 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
289290
/* Don't touch the hardware now */
290291
} else if (entry->msi_attrib.is_msix) {
291292
void __iomem *base = pci_msix_desc_addr(entry);
293+
bool unmasked = !(entry->masked & PCI_MSIX_ENTRY_CTRL_MASKBIT);
292294

293295
if (!base)
294296
goto skip;
295297

298+
/*
299+
* The specification mandates that the entry is masked
300+
* when the message is modified:
301+
*
302+
* "If software changes the Address or Data value of an
303+
* entry while the entry is unmasked, the result is
304+
* undefined."
305+
*/
306+
if (unmasked)
307+
__pci_msix_desc_mask_irq(entry, PCI_MSIX_ENTRY_CTRL_MASKBIT);
308+
296309
writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
297310
writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
298311
writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
312+
313+
if (unmasked)
314+
__pci_msix_desc_mask_irq(entry, 0);
315+
316+
/* Ensure that the writes are visible in the device */
317+
readl(base + PCI_MSIX_ENTRY_DATA);
299318
} else {
300319
int pos = dev->msi_cap;
301320
u16 msgctl;
@@ -316,6 +335,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
316335
pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
317336
msg->data);
318337
}
338+
/* Ensure that the writes are visible in the device */
339+
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
319340
}
320341

321342
skip:
@@ -636,21 +657,21 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
636657
/* Configure MSI capability structure */
637658
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
638659
if (ret) {
639-
msi_mask_irq(entry, mask, ~mask);
660+
msi_mask_irq(entry, mask, 0);
640661
free_msi_irqs(dev);
641662
return ret;
642663
}
643664

644665
ret = msi_verify_entries(dev);
645666
if (ret) {
646-
msi_mask_irq(entry, mask, ~mask);
667+
msi_mask_irq(entry, mask, 0);
647668
free_msi_irqs(dev);
648669
return ret;
649670
}
650671

651672
ret = populate_msi_sysfs(dev);
652673
if (ret) {
653-
msi_mask_irq(entry, mask, ~mask);
674+
msi_mask_irq(entry, mask, 0);
654675
free_msi_irqs(dev);
655676
return ret;
656677
}
@@ -691,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
691712
{
692713
struct irq_affinity_desc *curmsk, *masks = NULL;
693714
struct msi_desc *entry;
715+
void __iomem *addr;
694716
int ret, i;
695717
int vec_count = pci_msix_vec_count(dev);
696718

@@ -711,6 +733,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
711733

712734
entry->msi_attrib.is_msix = 1;
713735
entry->msi_attrib.is_64 = 1;
736+
714737
if (entries)
715738
entry->msi_attrib.entry_nr = entries[i].entry;
716739
else
@@ -722,6 +745,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
722745
entry->msi_attrib.default_irq = dev->irq;
723746
entry->mask_base = base;
724747

748+
addr = pci_msix_desc_addr(entry);
749+
if (addr)
750+
entry->masked = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
751+
725752
list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
726753
if (masks)
727754
curmsk++;
@@ -732,26 +759,25 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
732759
return ret;
733760
}
734761

735-
static void msix_program_entries(struct pci_dev *dev,
736-
struct msix_entry *entries)
762+
static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
737763
{
738764
struct msi_desc *entry;
739-
int i = 0;
740-
void __iomem *desc_addr;
741765

742766
for_each_pci_msi_entry(entry, dev) {
743-
if (entries)
744-
entries[i++].vector = entry->irq;
767+
if (entries) {
768+
entries->vector = entry->irq;
769+
entries++;
770+
}
771+
}
772+
}
745773

746-
desc_addr = pci_msix_desc_addr(entry);
747-
if (desc_addr)
748-
entry->masked = readl(desc_addr +
749-
PCI_MSIX_ENTRY_VECTOR_CTRL);
750-
else
751-
entry->masked = 0;
774+
static void msix_mask_all(void __iomem *base, int tsize)
775+
{
776+
u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
777+
int i;
752778

753-
msix_mask_irq(entry, 1);
754-
}
779+
for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
780+
writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
755781
}
756782

757783
/**
@@ -768,22 +794,33 @@ static void msix_program_entries(struct pci_dev *dev,
768794
static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
769795
int nvec, struct irq_affinity *affd)
770796
{
771-
int ret;
772-
u16 control;
773797
void __iomem *base;
798+
int ret, tsize;
799+
u16 control;
774800

775-
/* Ensure MSI-X is disabled while it is set up */
776-
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
801+
/*
802+
* Some devices require MSI-X to be enabled before the MSI-X
803+
* registers can be accessed. Mask all the vectors to prevent
804+
* interrupts coming in before they're fully set up.
805+
*/
806+
pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
807+
PCI_MSIX_FLAGS_ENABLE);
777808

778809
pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
779810
/* Request & Map MSI-X table region */
780-
base = msix_map_region(dev, msix_table_size(control));
781-
if (!base)
782-
return -ENOMEM;
811+
tsize = msix_table_size(control);
812+
base = msix_map_region(dev, tsize);
813+
if (!base) {
814+
ret = -ENOMEM;
815+
goto out_disable;
816+
}
817+
818+
/* Ensure that all table entries are masked. */
819+
msix_mask_all(base, tsize);
783820

784821
ret = msix_setup_entries(dev, base, entries, nvec, affd);
785822
if (ret)
786-
return ret;
823+
goto out_disable;
787824

788825
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
789826
if (ret)
@@ -794,15 +831,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
794831
if (ret)
795832
goto out_free;
796833

797-
/*
798-
* Some devices require MSI-X to be enabled before we can touch the
799-
* MSI-X registers. We need to mask all the vectors to prevent
800-
* interrupts coming in before they're fully set up.
801-
*/
802-
pci_msix_clear_and_set_ctrl(dev, 0,
803-
PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE);
804-
805-
msix_program_entries(dev, entries);
834+
msix_update_entries(dev, entries);
806835

807836
ret = populate_msi_sysfs(dev);
808837
if (ret)
@@ -836,6 +865,9 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
836865
out_free:
837866
free_msi_irqs(dev);
838867

868+
out_disable:
869+
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
870+
839871
return ret;
840872
}
841873

@@ -930,8 +962,7 @@ static void pci_msi_shutdown(struct pci_dev *dev)
930962

931963
/* Return the device with MSI unmasked as initial states */
932964
mask = msi_mask(desc->msi_attrib.multi_cap);
933-
/* Keep cached state to be restored */
934-
__pci_msi_desc_mask_irq(desc, mask, ~mask);
965+
msi_mask_irq(desc, mask, 0);
935966

936967
/* Restore dev->irq to its default pin-assertion IRQ */
937968
dev->irq = desc->msi_attrib.default_irq;
@@ -1016,10 +1047,8 @@ static void pci_msix_shutdown(struct pci_dev *dev)
10161047
}
10171048

10181049
/* Return the device with MSI-X masked as initial states */
1019-
for_each_pci_msi_entry(entry, dev) {
1020-
/* Keep cached states to be restored */
1050+
for_each_pci_msi_entry(entry, dev)
10211051
__pci_msix_desc_mask_irq(entry, 1);
1022-
}
10231052

10241053
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
10251054
pci_intx_for_msi(dev, 1);

include/linux/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ struct dev_links_info {
407407
* @em_pd: device's energy model performance domain
408408
* @pins: For device pin management.
409409
* See Documentation/driver-api/pin-control.rst for details.
410+
* @msi_lock: Lock to protect MSI mask cache and mask register
410411
* @msi_list: Hosts MSI descriptors
411412
* @msi_domain: The generic MSI domain this device is using.
412413
* @numa_node: NUMA node this device is close to.
@@ -506,6 +507,7 @@ struct device {
506507
struct dev_pin_info *pins;
507508
#endif
508509
#ifdef CONFIG_GENERIC_MSI_IRQ
510+
raw_spinlock_t msi_lock;
509511
struct list_head msi_list;
510512
#endif
511513
#ifdef CONFIG_DMA_OPS

include/linux/irq.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ struct irq_chip {
569569
* IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips
570570
* IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs
571571
* in the suspend path if they are in disabled state
572+
* IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup
572573
*/
573574
enum {
574575
IRQCHIP_SET_TYPE_MASKED = (1 << 0),
@@ -581,6 +582,7 @@ enum {
581582
IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7),
582583
IRQCHIP_SUPPORTS_NMI = (1 << 8),
583584
IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9),
585+
IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10),
584586
};
585587

586588
#include <linux/irqdesc.h>

include/linux/msi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
233233
void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
234234

235235
u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag);
236-
u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
236+
void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
237237
void pci_msi_mask_irq(struct irq_data *data);
238238
void pci_msi_unmask_irq(struct irq_data *data);
239239

kernel/irq/chip.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,11 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
265265
} else {
266266
switch (__irq_startup_managed(desc, aff, force)) {
267267
case IRQ_STARTUP_NORMAL:
268+
if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)
269+
irq_setup_affinity(desc);
268270
ret = __irq_startup(desc);
269-
irq_setup_affinity(desc);
271+
if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP))
272+
irq_setup_affinity(desc);
270273
break;
271274
case IRQ_STARTUP_MANAGED:
272275
irq_do_set_affinity(d, aff, false);

0 commit comments

Comments
 (0)