Skip to content

Commit c312828

Browse files
Andrea Righigregkh
authored andcommitted
kernfs: convert kernfs_idr_lock to an irq safe raw spinlock
bpf_cgroup_from_id() is basically a wrapper to cgroup_get_from_id(), that is relying on kernfs to determine the right cgroup associated to the target id. As a kfunc, it has the potential to be attached to any function through BPF, particularly in contexts where certain locks are held. However, kernfs is not using an irq safe spinlock for kernfs_idr_lock, that means any kernfs function that is acquiring this lock can be interrupted and potentially hit bpf_cgroup_from_id() in the process, triggering a deadlock. For example, it is really easy to trigger a lockdep splat between kernfs_idr_lock and rq->_lock, attaching a small BPF program to __set_cpus_allowed_ptr_locked() that just calls bpf_cgroup_from_id(): ===================================================== WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected 6.7.0-rc7-virtme #5 Not tainted ----------------------------------------------------- repro/131 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffffffffb2dc4578 (kernfs_idr_lock){+.+.}-{2:2}, at: kernfs_find_and_get_node_by_id+0x1d/0x80 and this task is already holding: ffff911cbecaf218 (&rq->__lock){-.-.}-{2:2}, at: task_rq_lock+0x50/0xc0 which would create a new lock dependency: (&rq->__lock){-.-.}-{2:2} -> (kernfs_idr_lock){+.+.}-{2:2} but this new dependency connects a HARDIRQ-irq-safe lock: (&rq->__lock){-.-.}-{2:2} ... which became HARDIRQ-irq-safe at: lock_acquire+0xbf/0x2b0 _raw_spin_lock_nested+0x2e/0x40 scheduler_tick+0x5d/0x170 update_process_times+0x9c/0xb0 tick_periodic+0x27/0xe0 tick_handle_periodic+0x24/0x70 __sysvec_apic_timer_interrupt+0x64/0x1a0 sysvec_apic_timer_interrupt+0x6f/0x80 asm_sysvec_apic_timer_interrupt+0x1a/0x20 memcpy+0xc/0x20 arch_dup_task_struct+0x15/0x30 copy_process+0x1ce/0x1eb0 kernel_clone+0xac/0x390 kernel_thread+0x6f/0xa0 kthreadd+0x199/0x230 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1b/0x30 to a HARDIRQ-irq-unsafe lock: (kernfs_idr_lock){+.+.}-{2:2} ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0xbf/0x2b0 _raw_spin_lock+0x30/0x40 __kernfs_new_node.isra.0+0x83/0x280 kernfs_create_root+0xf6/0x1d0 sysfs_init+0x1b/0x70 mnt_init+0xd9/0x2a0 vfs_caches_init+0xcf/0xe0 start_kernel+0x58a/0x6a0 x86_64_start_reservations+0x18/0x30 x86_64_start_kernel+0xc5/0xe0 secondary_startup_64_no_verify+0x178/0x17b other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(kernfs_idr_lock); local_irq_disable(); lock(&rq->__lock); lock(kernfs_idr_lock); <Interrupt> lock(&rq->__lock); *** DEADLOCK *** Prevent this deadlock condition converting kernfs_idr_lock to a raw irq safe spinlock. The performance impact of this change should be negligible and it also helps to prevent similar deadlock conditions with any other subsystems that may depend on kernfs. Fixes: 332ea1f ("bpf: Add bpf_cgroup_from_id() kfunc") Cc: stable <[email protected]> Signed-off-by: Andrea Righi <[email protected]> Acked-by: Tejun Heo <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 93ec4a3 commit c312828

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

fs/kernfs/dir.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */
2727
*/
2828
static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
2929
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */
30-
static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
30+
static DEFINE_RAW_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
3131

3232
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
3333

@@ -539,6 +539,7 @@ void kernfs_put(struct kernfs_node *kn)
539539
{
540540
struct kernfs_node *parent;
541541
struct kernfs_root *root;
542+
unsigned long flags;
542543

543544
if (!kn || !atomic_dec_and_test(&kn->count))
544545
return;
@@ -563,9 +564,9 @@ void kernfs_put(struct kernfs_node *kn)
563564
simple_xattrs_free(&kn->iattr->xattrs, NULL);
564565
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
565566
}
566-
spin_lock(&kernfs_idr_lock);
567+
raw_spin_lock_irqsave(&kernfs_idr_lock, flags);
567568
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
568-
spin_unlock(&kernfs_idr_lock);
569+
raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags);
569570
kmem_cache_free(kernfs_node_cache, kn);
570571

571572
kn = parent;
@@ -607,6 +608,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
607608
struct kernfs_node *kn;
608609
u32 id_highbits;
609610
int ret;
611+
unsigned long irqflags;
610612

611613
name = kstrdup_const(name, GFP_KERNEL);
612614
if (!name)
@@ -617,13 +619,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
617619
goto err_out1;
618620

619621
idr_preload(GFP_KERNEL);
620-
spin_lock(&kernfs_idr_lock);
622+
raw_spin_lock_irqsave(&kernfs_idr_lock, irqflags);
621623
ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
622624
if (ret >= 0 && ret < root->last_id_lowbits)
623625
root->id_highbits++;
624626
id_highbits = root->id_highbits;
625627
root->last_id_lowbits = ret;
626-
spin_unlock(&kernfs_idr_lock);
628+
raw_spin_unlock_irqrestore(&kernfs_idr_lock, irqflags);
627629
idr_preload_end();
628630
if (ret < 0)
629631
goto err_out2;
@@ -659,9 +661,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
659661
return kn;
660662

661663
err_out3:
662-
spin_lock(&kernfs_idr_lock);
664+
raw_spin_lock_irqsave(&kernfs_idr_lock, irqflags);
663665
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
664-
spin_unlock(&kernfs_idr_lock);
666+
raw_spin_unlock_irqrestore(&kernfs_idr_lock, irqflags);
665667
err_out2:
666668
kmem_cache_free(kernfs_node_cache, kn);
667669
err_out1:
@@ -714,8 +716,9 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
714716
struct kernfs_node *kn;
715717
ino_t ino = kernfs_id_ino(id);
716718
u32 gen = kernfs_id_gen(id);
719+
unsigned long flags;
717720

718-
spin_lock(&kernfs_idr_lock);
721+
raw_spin_lock_irqsave(&kernfs_idr_lock, flags);
719722

720723
kn = idr_find(&root->ino_idr, (u32)ino);
721724
if (!kn)
@@ -739,10 +742,10 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
739742
if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
740743
goto err_unlock;
741744

742-
spin_unlock(&kernfs_idr_lock);
745+
raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags);
743746
return kn;
744747
err_unlock:
745-
spin_unlock(&kernfs_idr_lock);
748+
raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags);
746749
return NULL;
747750
}
748751

0 commit comments

Comments
 (0)