Skip to content

Commit cd7bdd9

Browse files
anakryikoPeter Zijlstra
authored andcommitted
uprobes: perform lockless SRCU-protected uprobes_tree lookup
Another big bottleneck to scalablity is uprobe_treelock that's taken in a very hot path in handle_swbp(). Now that uprobes are SRCU-protected, take advantage of that and make uprobes_tree RB-tree look up lockless. To make RB-tree RCU-protected lockless lookup correct, we need to take into account that such RB-tree lookup can return false negatives if there are parallel RB-tree modifications (rotations) going on. We use seqcount lock to detect whether RB-tree changed, and if we find nothing while RB-tree got modified inbetween, we just retry. If uprobe was found, then it's guaranteed to be a correct lookup. With all the lock-avoiding changes done, we get a pretty decent improvement in performance and scalability of uprobes with number of CPUs, even though we are still nowhere near linear scalability. This is due to SRCU not really scaling very well with number of CPUs on a particular hardware that was used for testing (80-core Intel Xeon Gold 6138 CPU @ 2.00GHz), but also due to the remaning mmap_lock, which is currently taken to resolve interrupt address to inode+offset and then uprobe instance. And, of course, uretprobes still need similar RCU to avoid refcount in the hot path, which will be addressed in the follow up patches. Nevertheless, the improvement is good. We used BPF selftest-based uprobe-nop and uretprobe-nop benchmarks to get the below numbers, varying number of CPUs on which uprobes and uretprobes are triggered. BASELINE ======== uprobe-nop ( 1 cpus): 3.032 ± 0.023M/s ( 3.032M/s/cpu) uprobe-nop ( 2 cpus): 3.452 ± 0.005M/s ( 1.726M/s/cpu) uprobe-nop ( 4 cpus): 3.663 ± 0.005M/s ( 0.916M/s/cpu) uprobe-nop ( 8 cpus): 3.718 ± 0.038M/s ( 0.465M/s/cpu) uprobe-nop (16 cpus): 3.344 ± 0.008M/s ( 0.209M/s/cpu) uprobe-nop (32 cpus): 2.288 ± 0.021M/s ( 0.071M/s/cpu) uprobe-nop (64 cpus): 3.205 ± 0.004M/s ( 0.050M/s/cpu) uretprobe-nop ( 1 cpus): 1.979 ± 0.005M/s ( 1.979M/s/cpu) uretprobe-nop ( 2 cpus): 2.361 ± 0.005M/s ( 1.180M/s/cpu) uretprobe-nop ( 4 cpus): 2.309 ± 0.002M/s ( 0.577M/s/cpu) uretprobe-nop ( 8 cpus): 2.253 ± 0.001M/s ( 0.282M/s/cpu) uretprobe-nop (16 cpus): 2.007 ± 0.000M/s ( 0.125M/s/cpu) uretprobe-nop (32 cpus): 1.624 ± 0.003M/s ( 0.051M/s/cpu) uretprobe-nop (64 cpus): 2.149 ± 0.001M/s ( 0.034M/s/cpu) SRCU CHANGES ============ uprobe-nop ( 1 cpus): 3.276 ± 0.005M/s ( 3.276M/s/cpu) uprobe-nop ( 2 cpus): 4.125 ± 0.002M/s ( 2.063M/s/cpu) uprobe-nop ( 4 cpus): 7.713 ± 0.002M/s ( 1.928M/s/cpu) uprobe-nop ( 8 cpus): 8.097 ± 0.006M/s ( 1.012M/s/cpu) uprobe-nop (16 cpus): 6.501 ± 0.056M/s ( 0.406M/s/cpu) uprobe-nop (32 cpus): 4.398 ± 0.084M/s ( 0.137M/s/cpu) uprobe-nop (64 cpus): 6.452 ± 0.000M/s ( 0.101M/s/cpu) uretprobe-nop ( 1 cpus): 2.055 ± 0.001M/s ( 2.055M/s/cpu) uretprobe-nop ( 2 cpus): 2.677 ± 0.000M/s ( 1.339M/s/cpu) uretprobe-nop ( 4 cpus): 4.561 ± 0.003M/s ( 1.140M/s/cpu) uretprobe-nop ( 8 cpus): 5.291 ± 0.002M/s ( 0.661M/s/cpu) uretprobe-nop (16 cpus): 5.065 ± 0.019M/s ( 0.317M/s/cpu) uretprobe-nop (32 cpus): 3.622 ± 0.003M/s ( 0.113M/s/cpu) uretprobe-nop (64 cpus): 3.723 ± 0.002M/s ( 0.058M/s/cpu) Peak througput increased from 3.7 mln/s (uprobe triggerings) up to about 8 mln/s. For uretprobes it's a bit more modest with bump from 2.4 mln/s to 5mln/s. Suggested-by: "Peter Zijlstra (Intel)" <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Oleg Nesterov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 50a3803 commit cd7bdd9

File tree

1 file changed

+24
-6
lines changed

1 file changed

+24
-6
lines changed

kernel/events/uprobes.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static struct rb_root uprobes_tree = RB_ROOT;
4040
#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
4141

4242
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
43+
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);
4344

4445
DEFINE_STATIC_SRCU(uprobes_srcu);
4546

@@ -634,8 +635,11 @@ static void put_uprobe(struct uprobe *uprobe)
634635

635636
write_lock(&uprobes_treelock);
636637

637-
if (uprobe_is_active(uprobe))
638+
if (uprobe_is_active(uprobe)) {
639+
write_seqcount_begin(&uprobes_seqcount);
638640
rb_erase(&uprobe->rb_node, &uprobes_tree);
641+
write_seqcount_end(&uprobes_seqcount);
642+
}
639643

640644
write_unlock(&uprobes_treelock);
641645

@@ -701,14 +705,26 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
701705
.offset = offset,
702706
};
703707
struct rb_node *node;
708+
unsigned int seq;
704709

705710
lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
706711

707-
read_lock(&uprobes_treelock);
708-
node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
709-
read_unlock(&uprobes_treelock);
712+
do {
713+
seq = read_seqcount_begin(&uprobes_seqcount);
714+
node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key);
715+
/*
716+
* Lockless RB-tree lookups can result only in false negatives.
717+
* If the element is found, it is correct and can be returned
718+
* under RCU protection. If we find nothing, we need to
719+
* validate that seqcount didn't change. If it did, we have to
720+
* try again as we might have missed the element (false
721+
* negative). If seqcount is unchanged, search truly failed.
722+
*/
723+
if (node)
724+
return __node_2_uprobe(node);
725+
} while (read_seqcount_retry(&uprobes_seqcount, seq));
710726

711-
return node ? __node_2_uprobe(node) : NULL;
727+
return NULL;
712728
}
713729

714730
/*
@@ -730,7 +746,7 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
730746
{
731747
struct rb_node *node;
732748
again:
733-
node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp);
749+
node = rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp);
734750
if (node) {
735751
struct uprobe *u = __node_2_uprobe(node);
736752

@@ -755,7 +771,9 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
755771
struct uprobe *u;
756772

757773
write_lock(&uprobes_treelock);
774+
write_seqcount_begin(&uprobes_seqcount);
758775
u = __insert_uprobe(uprobe);
776+
write_seqcount_end(&uprobes_seqcount);
759777
write_unlock(&uprobes_treelock);
760778

761779
return u;

0 commit comments

Comments
 (0)