Skip to content

Commit 981780c

Browse files
committed
hw/i386/pc: Fix level interrupt sharing for Xen event channel GSI
The system GSIs are not designed for sharing. One device might assert a shared interrupt with qemu_set_irq() and another might deassert it, and the level from the first device is lost. This could be solved by refactoring the x86 GSI code to use an OrIrq device, but that still wouldn't be ideal. The best answer would be to have a 'resample' callback which is invoked when the interrupt is acked at the interrupt controller, and causes the devices to re-trigger the interrupt if it should still be pending. This is the model that VFIO in Linux uses, with a 'resampler' eventfd that actually unmasks the interrupt on the hardware device and thus triggers a new interrupt from it if needed. As things stand, QEMU currently doesn't use that VFIO interface correctly, and just bashes on the resampler for every MMIO access to the device "just in case". Which requires unmapping and trapping the MMIO while an interrupt is pending! For the Xen callback GSI, QEMU does something similar — a flag is set which triggers a poll on *every* vmexst to see if the GSI should be deasserted. Proper resampler support would solve all of that, but is a task for later which has already been on the TODO list for a while. Since the Xen event channel GSI support *already* has hooks into the PC gsi_handler() code for routing GSIs to PIRQs, we can use that for a simpler bug fix. So... remember the externally-driven state of the line (from e.g. PCI INTx) and set the logical OR of that with the GSI. As a bonus, we now only need to enable the polling of vcpu_info on vmexit if the Xen callback GSI is the *only* reason the corresponding line is asserted. Closes: https://gitlab.com/qemu-project/qemu/-/issues/2731 Fixes: ddf0fd9 ("hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback") Reported-by: Thomas Huth <[email protected]> Signed-off-by: David Woodhouse <[email protected]> Acked-by: Michael S. Tsirkin <[email protected]>
1 parent 3f8bcbb commit 981780c

File tree

3 files changed

+69
-25
lines changed

3 files changed

+69
-25
lines changed

hw/i386/kvm/xen_evtchn.c

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ struct XenEvtchnState {
140140

141141
uint64_t callback_param;
142142
bool evtchn_in_kernel;
143+
bool setting_callback_gsi;
144+
int extern_gsi_level;
143145
uint32_t callback_gsi;
144146

145147
QEMUBH *gsi_bh;
@@ -431,9 +433,22 @@ void xen_evtchn_set_callback_level(int level)
431433
}
432434

433435
if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
434-
qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
435-
if (level) {
436-
/* Ensure the vCPU polls for deassertion */
436+
/*
437+
* Ugly, but since we hold the BQL we can set this flag so that
438+
* xen_evtchn_set_gsi() can tell the difference between this code
439+
* setting the GSI, and an external device (PCI INTx) doing so.
440+
*/
441+
s->setting_callback_gsi = true;
442+
/* Do not deassert the line if an external device is asserting it. */
443+
qemu_set_irq(s->callback_gsis[s->callback_gsi],
444+
level || s->extern_gsi_level);
445+
s->setting_callback_gsi = false;
446+
447+
/*
448+
* If the callback GSI is the only one asserted, ensure the status
449+
* is polled for deassertion in kvm_arch_post_run().
450+
*/
451+
if (level && !s->extern_gsi_level) {
437452
kvm_xen_set_callback_asserted();
438453
}
439454
}
@@ -1596,7 +1611,7 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
15961611
return pirq;
15971612
}
15981613

1599-
bool xen_evtchn_set_gsi(int gsi, int level)
1614+
bool xen_evtchn_set_gsi(int gsi, int *level)
16001615
{
16011616
XenEvtchnState *s = xen_evtchn_singleton;
16021617
int pirq;
@@ -1608,16 +1623,35 @@ bool xen_evtchn_set_gsi(int gsi, int level)
16081623
}
16091624

16101625
/*
1611-
* Check that that it *isn't* the event channel GSI, and thus
1612-
* that we are not recursing and it's safe to take s->port_lock.
1613-
*
1614-
* Locking aside, it's perfectly sane to bail out early for that
1615-
* special case, as it would make no sense for the event channel
1616-
* GSI to be routed back to event channels, when the delivery
1617-
* method is to raise the GSI... that recursion wouldn't *just*
1618-
* be a locking issue.
1626+
* For the callback_gsi we need to implement a logical OR of the event
1627+
* channel GSI and the external input (e.g. from PCI INTx), because
1628+
* QEMU itself doesn't support shared level interrupts via demux or
1629+
* resamplers.
16191630
*/
16201631
if (gsi && gsi == s->callback_gsi) {
1632+
/* Remember the external state of the GSI pin (e.g. from PCI INTx) */
1633+
if (!s->setting_callback_gsi) {
1634+
s->extern_gsi_level = *level;
1635+
1636+
/*
1637+
* Don't allow the external device to deassert the line if the
1638+
* eveht channel GSI should still be asserted.
1639+
*/
1640+
if (!s->extern_gsi_level) {
1641+
struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
1642+
if (vi && vi->evtchn_upcall_pending) {
1643+
/* Need to poll for deassertion */
1644+
kvm_xen_set_callback_asserted();
1645+
*level = 1;
1646+
}
1647+
}
1648+
}
1649+
1650+
/*
1651+
* The event channel GSI cannot be routed to PIRQ, as that would make
1652+
* no sense. It could also deadlock on s->port_lock, if we proceed.
1653+
* So bail out now.
1654+
*/
16211655
return false;
16221656
}
16231657

@@ -1628,7 +1662,7 @@ bool xen_evtchn_set_gsi(int gsi, int level)
16281662
return false;
16291663
}
16301664

1631-
if (level) {
1665+
if (*level) {
16321666
int port = s->pirq[pirq].port;
16331667

16341668
s->pirq_gsi_set |= (1U << gsi);

hw/i386/kvm/xen_evtchn.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ void xen_evtchn_set_callback_level(int level);
2323

2424
int xen_evtchn_set_port(uint16_t port);
2525

26-
bool xen_evtchn_set_gsi(int gsi, int level);
26+
bool xen_evtchn_set_gsi(int gsi, int *level);
2727
void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
2828
uint64_t addr, uint32_t data, bool is_masked);
2929
void xen_evtchn_remove_pci_device(PCIDevice *dev);

hw/i386/x86-common.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -450,8 +450,27 @@ static long get_file_size(FILE *f)
450450
void gsi_handler(void *opaque, int n, int level)
451451
{
452452
GSIState *s = opaque;
453+
bool bypass_ioapic = false;
453454

454455
trace_x86_gsi_interrupt(n, level);
456+
457+
#ifdef CONFIG_XEN_EMU
458+
/*
459+
* Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
460+
* routing actually works properly under Xen). And then to
461+
* *either* the PIRQ handling or the I/OAPIC depending on whether
462+
* the former wants it.
463+
*
464+
* Additionally, this hook allows the Xen event channel GSI to
465+
* work around QEMU's lack of support for shared level interrupts,
466+
* by keeping track of the externally driven state of the pin and
467+
* implementing a logical OR with the state of the evtchn GSI.
468+
*/
469+
if (xen_mode == XEN_EMULATE) {
470+
bypass_ioapic = xen_evtchn_set_gsi(n, &level);
471+
}
472+
#endif
473+
455474
switch (n) {
456475
case 0 ... ISA_NUM_IRQS - 1:
457476
if (s->i8259_irq[n]) {
@@ -460,18 +479,9 @@ void gsi_handler(void *opaque, int n, int level)
460479
}
461480
/* fall through */
462481
case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1:
463-
#ifdef CONFIG_XEN_EMU
464-
/*
465-
* Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
466-
* routing actually works properly under Xen). And then to
467-
* *either* the PIRQ handling or the I/OAPIC depending on
468-
* whether the former wants it.
469-
*/
470-
if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) {
471-
break;
482+
if (!bypass_ioapic) {
483+
qemu_set_irq(s->ioapic_irq[n], level);
472484
}
473-
#endif
474-
qemu_set_irq(s->ioapic_irq[n], level);
475485
break;
476486
case IO_APIC_SECONDARY_IRQBASE
477487
... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1:

0 commit comments

Comments
 (0)