Skip to content

Commit 9154244

Browse files
joelagnelpaulmckrcu
authored andcommitted
rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching
To reduce grace periods and improve kfree() performance, we have done batching recently dramatically bringing down the number of grace periods while giving us the ability to use kfree_bulk() for efficient kfree'ing. However, this has increased the likelihood of OOM condition under heavy kfree_rcu() flood on small memory systems. This patch introduces a shrinker which starts grace periods right away if the system is under memory pressure due to existence of objects that have still not started a grace period. With this patch, I do not observe an OOM anymore on a system with 512MB RAM and 8 CPUs, with the following rcuperf options: rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000 rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2 Otherwise it easily OOMs with the above parameters. NOTE: 1. On systems with no memory pressure, the patch has no effect as intended. 2. In the future, we can use this same mechanism to prevent grace periods from happening even more, by relying on shrinkers carefully. Cc: [email protected] Signed-off-by: Joel Fernandes (Google) <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent f87dc80 commit 9154244

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

kernel/rcu/tree.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2824,6 +2824,8 @@ struct kfree_rcu_cpu {
28242824
struct delayed_work monitor_work;
28252825
bool monitor_todo;
28262826
bool initialized;
2827+
// Number of objects for which GP not started
2828+
int count;
28272829
};
28282830

28292831
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2937,6 +2939,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
29372939
krcp->head = NULL;
29382940
}
29392941

2942+
krcp->count = 0;
2943+
29402944
/*
29412945
* One work is per one batch, so there are two "free channels",
29422946
* "bhead_free" and "head_free" the batch can handle. It can be
@@ -3073,6 +3077,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
30733077
krcp->head = head;
30743078
}
30753079

3080+
krcp->count++;
3081+
30763082
// Set timer to drain after KFREE_DRAIN_JIFFIES.
30773083
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
30783084
!krcp->monitor_todo) {
@@ -3087,6 +3093,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
30873093
}
30883094
EXPORT_SYMBOL_GPL(kfree_call_rcu);
30893095

3096+
static unsigned long
3097+
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
3098+
{
3099+
int cpu;
3100+
unsigned long flags, count = 0;
3101+
3102+
/* Snapshot count of all CPUs */
3103+
for_each_online_cpu(cpu) {
3104+
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3105+
3106+
spin_lock_irqsave(&krcp->lock, flags);
3107+
count += krcp->count;
3108+
spin_unlock_irqrestore(&krcp->lock, flags);
3109+
}
3110+
3111+
return count;
3112+
}
3113+
3114+
static unsigned long
3115+
kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
3116+
{
3117+
int cpu, freed = 0;
3118+
unsigned long flags;
3119+
3120+
for_each_online_cpu(cpu) {
3121+
int count;
3122+
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3123+
3124+
count = krcp->count;
3125+
spin_lock_irqsave(&krcp->lock, flags);
3126+
if (krcp->monitor_todo)
3127+
kfree_rcu_drain_unlock(krcp, flags);
3128+
else
3129+
spin_unlock_irqrestore(&krcp->lock, flags);
3130+
3131+
sc->nr_to_scan -= count;
3132+
freed += count;
3133+
3134+
if (sc->nr_to_scan <= 0)
3135+
break;
3136+
}
3137+
3138+
return freed;
3139+
}
3140+
3141+
static struct shrinker kfree_rcu_shrinker = {
3142+
.count_objects = kfree_rcu_shrink_count,
3143+
.scan_objects = kfree_rcu_shrink_scan,
3144+
.batch = 0,
3145+
.seeks = DEFAULT_SEEKS,
3146+
};
3147+
30903148
void __init kfree_rcu_scheduler_running(void)
30913149
{
30923150
int cpu;
@@ -4007,6 +4065,8 @@ static void __init kfree_rcu_batch_init(void)
40074065
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
40084066
krcp->initialized = true;
40094067
}
4068+
if (register_shrinker(&kfree_rcu_shrinker))
4069+
pr_err("Failed to register kfree_rcu() shrinker!\n");
40104070
}
40114071

40124072
void __init rcu_init(void)

0 commit comments

Comments
 (0)