Skip to content

Commit 8617408

Browse files
anakryikoPeter Zijlstra
authored andcommitted
uprobes: protected uprobe lifetime with SRCU
To avoid unnecessarily taking a (brief) refcount on uprobe during breakpoint handling in handle_swbp for entry uprobes, make find_uprobe() not take refcount, but protect the lifetime of a uprobe instance with RCU. This improves scalability, as refcount gets quite expensive due to cache line bouncing between multiple CPUs. Specifically, we utilize our own uprobe-specific SRCU instance for this RCU protection. put_uprobe() will delay actual kfree() using call_srcu(). For now, uretprobe and single-stepping handling will still acquire refcount as necessary. We'll address these issues in follow up patches by making them use SRCU with timeout. Signed-off-by: Andrii Nakryiko <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Oleg Nesterov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 3f7f1a6 commit 8617408

File tree

1 file changed

+54
-40
lines changed

1 file changed

+54
-40
lines changed

kernel/events/uprobes.c

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ static struct rb_root uprobes_tree = RB_ROOT;
4141

4242
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
4343

44+
DEFINE_STATIC_SRCU(uprobes_srcu);
45+
4446
#define UPROBES_HASH_SZ 13
4547
/* serialize uprobe->pending_list */
4648
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
@@ -59,6 +61,7 @@ struct uprobe {
5961
struct list_head pending_list;
6062
struct uprobe_consumer *consumers;
6163
struct inode *inode; /* Also hold a ref to inode */
64+
struct rcu_head rcu;
6265
loff_t offset;
6366
loff_t ref_ctr_offset;
6467
unsigned long flags;
@@ -617,6 +620,13 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
617620
return !RB_EMPTY_NODE(&uprobe->rb_node);
618621
}
619622

623+
static void uprobe_free_rcu(struct rcu_head *rcu)
624+
{
625+
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
626+
627+
kfree(uprobe);
628+
}
629+
620630
static void put_uprobe(struct uprobe *uprobe)
621631
{
622632
if (!refcount_dec_and_test(&uprobe->ref))
@@ -638,7 +648,7 @@ static void put_uprobe(struct uprobe *uprobe)
638648
delayed_uprobe_remove(uprobe, NULL);
639649
mutex_unlock(&delayed_uprobe_lock);
640650

641-
kfree(uprobe);
651+
call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
642652
}
643653

644654
static __always_inline
@@ -680,33 +690,25 @@ static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
680690
return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b));
681691
}
682692

683-
static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
693+
/*
694+
* Assumes being inside RCU protected region.
695+
* No refcount is taken on returned uprobe.
696+
*/
697+
static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
684698
{
685699
struct __uprobe_key key = {
686700
.inode = inode,
687701
.offset = offset,
688702
};
689-
struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
690-
691-
if (node)
692-
return try_get_uprobe(__node_2_uprobe(node));
693-
694-
return NULL;
695-
}
703+
struct rb_node *node;
696704

697-
/*
698-
* Find a uprobe corresponding to a given inode:offset
699-
* Acquires uprobes_treelock
700-
*/
701-
static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
702-
{
703-
struct uprobe *uprobe;
705+
lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
704706

705707
read_lock(&uprobes_treelock);
706-
uprobe = __find_uprobe(inode, offset);
708+
node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
707709
read_unlock(&uprobes_treelock);
708710

709-
return uprobe;
711+
return node ? __node_2_uprobe(node) : NULL;
710712
}
711713

712714
/*
@@ -1080,10 +1082,10 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
10801082
goto free;
10811083
/*
10821084
* We take mmap_lock for writing to avoid the race with
1083-
* find_active_uprobe() which takes mmap_lock for reading.
1085+
* find_active_uprobe_rcu() which takes mmap_lock for reading.
10841086
* Thus this install_breakpoint() can not make
1085-
* is_trap_at_addr() true right after find_uprobe()
1086-
* returns NULL in find_active_uprobe().
1087+
* is_trap_at_addr() true right after find_uprobe_rcu()
1088+
* returns NULL in find_active_uprobe_rcu().
10871089
*/
10881090
mmap_write_lock(mm);
10891091
vma = find_vma(mm, info->vaddr);
@@ -1884,9 +1886,13 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
18841886
return;
18851887
}
18861888

1889+
/* we need to bump refcount to store uprobe in utask */
1890+
if (!try_get_uprobe(uprobe))
1891+
return;
1892+
18871893
ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
18881894
if (!ri)
1889-
return;
1895+
goto fail;
18901896

18911897
trampoline_vaddr = uprobe_get_trampoline_vaddr();
18921898
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
@@ -1913,11 +1919,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
19131919
}
19141920
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
19151921
}
1916-
/*
1917-
* uprobe's refcnt is positive, held by caller, so it's safe to
1918-
* unconditionally bump it one more time here
1919-
*/
1920-
ri->uprobe = get_uprobe(uprobe);
1922+
ri->uprobe = uprobe;
19211923
ri->func = instruction_pointer(regs);
19221924
ri->stack = user_stack_pointer(regs);
19231925
ri->orig_ret_vaddr = orig_ret_vaddr;
@@ -1928,8 +1930,9 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
19281930
utask->return_instances = ri;
19291931

19301932
return;
1931-
fail:
1933+
fail:
19321934
kfree(ri);
1935+
put_uprobe(uprobe);
19331936
}
19341937

19351938
/* Prepare to single-step probed instruction out of line. */
@@ -1944,22 +1947,30 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
19441947
if (!utask)
19451948
return -ENOMEM;
19461949

1950+
if (!try_get_uprobe(uprobe))
1951+
return -EINVAL;
1952+
19471953
xol_vaddr = xol_get_insn_slot(uprobe);
1948-
if (!xol_vaddr)
1949-
return -ENOMEM;
1954+
if (!xol_vaddr) {
1955+
err = -ENOMEM;
1956+
goto err_out;
1957+
}
19501958

19511959
utask->xol_vaddr = xol_vaddr;
19521960
utask->vaddr = bp_vaddr;
19531961

19541962
err = arch_uprobe_pre_xol(&uprobe->arch, regs);
19551963
if (unlikely(err)) {
19561964
xol_free_insn_slot(current);
1957-
return err;
1965+
goto err_out;
19581966
}
19591967

19601968
utask->active_uprobe = uprobe;
19611969
utask->state = UTASK_SSTEP;
19621970
return 0;
1971+
err_out:
1972+
put_uprobe(uprobe);
1973+
return err;
19631974
}
19641975

19651976
/*
@@ -2043,7 +2054,8 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
20432054
return is_trap_insn(&opcode);
20442055
}
20452056

2046-
static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
2057+
/* assumes being inside RCU protected region */
2058+
static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp)
20472059
{
20482060
struct mm_struct *mm = current->mm;
20492061
struct uprobe *uprobe = NULL;
@@ -2056,7 +2068,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
20562068
struct inode *inode = file_inode(vma->vm_file);
20572069
loff_t offset = vaddr_to_offset(vma, bp_vaddr);
20582070

2059-
uprobe = find_uprobe(inode, offset);
2071+
uprobe = find_uprobe_rcu(inode, offset);
20602072
}
20612073

20622074
if (!uprobe)
@@ -2202,13 +2214,15 @@ static void handle_swbp(struct pt_regs *regs)
22022214
{
22032215
struct uprobe *uprobe;
22042216
unsigned long bp_vaddr;
2205-
int is_swbp;
2217+
int is_swbp, srcu_idx;
22062218

22072219
bp_vaddr = uprobe_get_swbp_addr(regs);
22082220
if (bp_vaddr == uprobe_get_trampoline_vaddr())
22092221
return uprobe_handle_trampoline(regs);
22102222

2211-
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
2223+
srcu_idx = srcu_read_lock(&uprobes_srcu);
2224+
2225+
uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
22122226
if (!uprobe) {
22132227
if (is_swbp > 0) {
22142228
/* No matching uprobe; signal SIGTRAP. */
@@ -2224,7 +2238,7 @@ static void handle_swbp(struct pt_regs *regs)
22242238
*/
22252239
instruction_pointer_set(regs, bp_vaddr);
22262240
}
2227-
return;
2241+
goto out;
22282242
}
22292243

22302244
/* change it in advance for ->handler() and restart */
@@ -2259,12 +2273,12 @@ static void handle_swbp(struct pt_regs *regs)
22592273
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
22602274
goto out;
22612275

2262-
if (!pre_ssout(uprobe, regs, bp_vaddr))
2263-
return;
2276+
if (pre_ssout(uprobe, regs, bp_vaddr))
2277+
goto out;
22642278

2265-
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
22662279
out:
2267-
put_uprobe(uprobe);
2280+
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
2281+
srcu_read_unlock(&uprobes_srcu, srcu_idx);
22682282
}
22692283

22702284
/*

0 commit comments

Comments
 (0)