Skip to content

Commit 87797fa

Browse files
committed
xen/events: replace evtchn_rwlock with RCU
In unprivileged Xen guests event handling can cause a deadlock with Xen console handling. The evtchn_rwlock and the hvc_lock are taken in opposite sequence in __hvc_poll() and in Xen console IRQ handling. Normally this is no problem, as the evtchn_rwlock is taken as a reader in both paths, but as soon as an event channel is being closed, the lock will be taken as a writer, which will cause read_lock() to block: CPU0 CPU1 CPU2 (IRQ handling) (__hvc_poll()) (closing event channel) read_lock(evtchn_rwlock) spin_lock(hvc_lock) write_lock(evtchn_rwlock) [blocks] spin_lock(hvc_lock) [blocks] read_lock(evtchn_rwlock) [blocks due to writer waiting, and not in_interrupt()] This issue can be avoided by replacing evtchn_rwlock with RCU in xen_free_irq(). Note that RCU is used only to delay freeing of the irq_info memory. There is no RCU based dereferencing or replacement of pointers involved. In order to avoid potential races between removing the irq_info reference and handling of interrupts, set the irq_info pointer to NULL only when freeing its memory. The IRQ itself must be freed at that time, too, as otherwise the same IRQ number could be allocated again before handling of the old instance would have been finished. This is XSA-441 / CVE-2023-34324. Fixes: 54c9de8 ("xen/events: add a new "late EOI" evtchn framework") Reported-by: Marek Marczykowski-Górecki <[email protected]> Signed-off-by: Juergen Gross <[email protected]> Reviewed-by: Julien Grall <[email protected]> Signed-off-by: Juergen Gross <[email protected]>
1 parent 94f6f05 commit 87797fa

File tree

1 file changed

+46
-41
lines changed

1 file changed

+46
-41
lines changed

drivers/xen/events/events_base.c

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <linux/slab.h>
3434
#include <linux/irqnr.h>
3535
#include <linux/pci.h>
36+
#include <linux/rcupdate.h>
3637
#include <linux/spinlock.h>
3738
#include <linux/cpuhotplug.h>
3839
#include <linux/atomic.h>
@@ -96,6 +97,7 @@ enum xen_irq_type {
9697
struct irq_info {
9798
struct list_head list;
9899
struct list_head eoi_list;
100+
struct rcu_work rwork;
99101
short refcnt;
100102
u8 spurious_cnt;
101103
u8 is_accounted;
@@ -146,23 +148,13 @@ const struct evtchn_ops *evtchn_ops;
146148
*/
147149
static DEFINE_MUTEX(irq_mapping_update_lock);
148150

149-
/*
150-
* Lock protecting event handling loop against removing event channels.
151-
* Adding of event channels is no issue as the associated IRQ becomes active
152-
* only after everything is setup (before request_[threaded_]irq() the handler
153-
* can't be entered for an event, as the event channel will be unmasked only
154-
* then).
155-
*/
156-
static DEFINE_RWLOCK(evtchn_rwlock);
157-
158151
/*
159152
* Lock hierarchy:
160153
*
161154
* irq_mapping_update_lock
162-
* evtchn_rwlock
163-
* IRQ-desc lock
164-
* percpu eoi_list_lock
165-
* irq_info->lock
155+
* IRQ-desc lock
156+
* percpu eoi_list_lock
157+
* irq_info->lock
166158
*/
167159

168160
static LIST_HEAD(xen_irq_list_head);
@@ -306,6 +298,22 @@ static void channels_on_cpu_inc(struct irq_info *info)
306298
info->is_accounted = 1;
307299
}
308300

301+
static void delayed_free_irq(struct work_struct *work)
302+
{
303+
struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
304+
rwork);
305+
unsigned int irq = info->irq;
306+
307+
/* Remove the info pointer only now, with no potential users left. */
308+
set_info_for_irq(irq, NULL);
309+
310+
kfree(info);
311+
312+
/* Legacy IRQ descriptors are managed by the arch. */
313+
if (irq >= nr_legacy_irqs())
314+
irq_free_desc(irq);
315+
}
316+
309317
/* Constructors for packed IRQ information. */
310318
static int xen_irq_info_common_setup(struct irq_info *info,
311319
unsigned irq,
@@ -668,33 +676,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work)
668676

669677
eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
670678

671-
read_lock_irqsave(&evtchn_rwlock, flags);
679+
rcu_read_lock();
672680

673681
while (true) {
674-
spin_lock(&eoi->eoi_list_lock);
682+
spin_lock_irqsave(&eoi->eoi_list_lock, flags);
675683

676684
info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
677685
eoi_list);
678686

679-
if (info == NULL || now < info->eoi_time) {
680-
spin_unlock(&eoi->eoi_list_lock);
687+
if (info == NULL)
688+
break;
689+
690+
if (now < info->eoi_time) {
691+
mod_delayed_work_on(info->eoi_cpu, system_wq,
692+
&eoi->delayed,
693+
info->eoi_time - now);
681694
break;
682695
}
683696

684697
list_del_init(&info->eoi_list);
685698

686-
spin_unlock(&eoi->eoi_list_lock);
699+
spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
687700

688701
info->eoi_time = 0;
689702

690703
xen_irq_lateeoi_locked(info, false);
691704
}
692705

693-
if (info)
694-
mod_delayed_work_on(info->eoi_cpu, system_wq,
695-
&eoi->delayed, info->eoi_time - now);
706+
spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
696707

697-
read_unlock_irqrestore(&evtchn_rwlock, flags);
708+
rcu_read_unlock();
698709
}
699710

700711
static void xen_cpu_init_eoi(unsigned int cpu)
@@ -709,16 +720,15 @@ static void xen_cpu_init_eoi(unsigned int cpu)
709720
void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
710721
{
711722
struct irq_info *info;
712-
unsigned long flags;
713723

714-
read_lock_irqsave(&evtchn_rwlock, flags);
724+
rcu_read_lock();
715725

716726
info = info_for_irq(irq);
717727

718728
if (info)
719729
xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
720730

721-
read_unlock_irqrestore(&evtchn_rwlock, flags);
731+
rcu_read_unlock();
722732
}
723733
EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
724734

@@ -732,6 +742,7 @@ static void xen_irq_init(unsigned irq)
732742

733743
info->type = IRQT_UNBOUND;
734744
info->refcnt = -1;
745+
INIT_RCU_WORK(&info->rwork, delayed_free_irq);
735746

736747
set_info_for_irq(irq, info);
737748
/*
@@ -789,31 +800,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
789800
static void xen_free_irq(unsigned irq)
790801
{
791802
struct irq_info *info = info_for_irq(irq);
792-
unsigned long flags;
793803

794804
if (WARN_ON(!info))
795805
return;
796806

797-
write_lock_irqsave(&evtchn_rwlock, flags);
798-
799807
if (!list_empty(&info->eoi_list))
800808
lateeoi_list_del(info);
801809

802810
list_del(&info->list);
803811

804-
set_info_for_irq(irq, NULL);
805-
806812
WARN_ON(info->refcnt > 0);
807813

808-
write_unlock_irqrestore(&evtchn_rwlock, flags);
809-
810-
kfree(info);
811-
812-
/* Legacy IRQ descriptors are managed by the arch. */
813-
if (irq < nr_legacy_irqs())
814-
return;
815-
816-
irq_free_desc(irq);
814+
queue_rcu_work(system_wq, &info->rwork);
817815
}
818816

819817
/* Not called for lateeoi events. */
@@ -1711,7 +1709,14 @@ int xen_evtchn_do_upcall(void)
17111709
int cpu = smp_processor_id();
17121710
struct evtchn_loop_ctrl ctrl = { 0 };
17131711

1714-
read_lock(&evtchn_rwlock);
1712+
/*
1713+
* When closing an event channel the associated IRQ must not be freed
1714+
* until all cpus have left the event handling loop. This is ensured
1715+
* by taking the rcu_read_lock() while handling events, as freeing of
1716+
* the IRQ is handled via queue_rcu_work() _after_ closing the event
1717+
* channel.
1718+
*/
1719+
rcu_read_lock();
17151720

17161721
do {
17171722
vcpu_info->evtchn_upcall_pending = 0;
@@ -1724,7 +1729,7 @@ int xen_evtchn_do_upcall(void)
17241729

17251730
} while (vcpu_info->evtchn_upcall_pending);
17261731

1727-
read_unlock(&evtchn_rwlock);
1732+
rcu_read_unlock();
17281733

17291734
/*
17301735
* Increment irq_epoch only now to defer EOIs only for

0 commit comments

Comments
 (0)