Skip to content

Commit 0392beb

Browse files
joelagnelpaulmckrcu
authored andcommitted
rcu: Add multiple in-flight batches of kfree_rcu() work
During testing, it was observed that amount of memory consumed due kfree_rcu() batching is 300-400MB. Previously we had only a single head_free pointer pointing to the list of rcu_head(s) that are to be freed after a grace period. Until this list is drained, we cannot queue any more objects on it since such objects may not be ready to be reclaimed when the worker thread eventually gets to drainin g the head_free list. We can do better by maintaining multiple lists as done by this patch. Testing shows that memory consumption came down by around 100-150MB with just adding another list. Adding more than 1 additional list did not show any improvement. Suggested-by: Paul E. McKenney <[email protected]> Signed-off-by: Joel Fernandes (Google) <[email protected]> [ paulmck: Code style and initialization handling. ] [ paulmck: Fix field name, reported by kbuild test robot <[email protected]>. ] Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 569d767 commit 0392beb

File tree

1 file changed

+39
-12
lines changed

1 file changed

+39
-12
lines changed

kernel/rcu/tree.c

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2686,12 +2686,25 @@ EXPORT_SYMBOL_GPL(call_rcu);
26862686

26872687
/* Maximum number of jiffies to wait before draining a batch. */
26882688
#define KFREE_DRAIN_JIFFIES (HZ / 50)
2689+
#define KFREE_N_BATCHES 2
26892690

26902691
/**
2691-
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
2692+
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
26922693
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
2694+
* @head_free: List of kfree_rcu() objects waiting for a grace period
2695+
* @krcp: Pointer to @kfree_rcu_cpu structure
2696+
*/
2697+
2698+
struct kfree_rcu_cpu_work {
2699+
struct rcu_work rcu_work;
2700+
struct rcu_head *head_free;
2701+
struct kfree_rcu_cpu *krcp;
2702+
};
2703+
2704+
/**
2705+
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
26932706
* @head: List of kfree_rcu() objects not yet waiting for a grace period
2694-
* @head_free: List of kfree_rcu() objects already waiting for a grace period
2707+
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
26952708
* @lock: Synchronize access to this structure
26962709
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
26972710
* @monitor_todo: Tracks whether a @monitor_work delayed work is pending
@@ -2703,9 +2716,8 @@ EXPORT_SYMBOL_GPL(call_rcu);
27032716
* the interactions with the slab allocators.
27042717
*/
27052718
struct kfree_rcu_cpu {
2706-
struct rcu_work rcu_work;
27072719
struct rcu_head *head;
2708-
struct rcu_head *head_free;
2720+
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
27092721
spinlock_t lock;
27102722
struct delayed_work monitor_work;
27112723
bool monitor_todo;
@@ -2723,11 +2735,14 @@ static void kfree_rcu_work(struct work_struct *work)
27232735
unsigned long flags;
27242736
struct rcu_head *head, *next;
27252737
struct kfree_rcu_cpu *krcp;
2738+
struct kfree_rcu_cpu_work *krwp;
27262739

2727-
krcp = container_of(to_rcu_work(work), struct kfree_rcu_cpu, rcu_work);
2740+
krwp = container_of(to_rcu_work(work),
2741+
struct kfree_rcu_cpu_work, rcu_work);
2742+
krcp = krwp->krcp;
27282743
spin_lock_irqsave(&krcp->lock, flags);
2729-
head = krcp->head_free;
2730-
krcp->head_free = NULL;
2744+
head = krwp->head_free;
2745+
krwp->head_free = NULL;
27312746
spin_unlock_irqrestore(&krcp->lock, flags);
27322747

27332748
// List "head" is now private, so traverse locklessly.
@@ -2747,17 +2762,25 @@ static void kfree_rcu_work(struct work_struct *work)
27472762
*/
27482763
static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
27492764
{
2765+
int i;
2766+
struct kfree_rcu_cpu_work *krwp = NULL;
2767+
27502768
lockdep_assert_held(&krcp->lock);
2769+
for (i = 0; i < KFREE_N_BATCHES; i++)
2770+
if (!krcp->krw_arr[i].head_free) {
2771+
krwp = &(krcp->krw_arr[i]);
2772+
break;
2773+
}
27512774

27522775
// If a previous RCU batch is in progress, we cannot immediately
27532776
// queue another one, so return false to tell caller to retry.
2754-
if (krcp->head_free)
2777+
if (!krwp)
27552778
return false;
27562779

2757-
krcp->head_free = krcp->head;
2780+
krwp->head_free = krcp->head;
27582781
krcp->head = NULL;
2759-
INIT_RCU_WORK(&krcp->rcu_work, kfree_rcu_work);
2760-
queue_rcu_work(system_wq, &krcp->rcu_work);
2782+
INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
2783+
queue_rcu_work(system_wq, &krwp->rcu_work);
27612784
return true;
27622785
}
27632786

@@ -2863,7 +2886,8 @@ void __init kfree_rcu_scheduler_running(void)
28632886
continue;
28642887
}
28652888
krcp->monitor_todo = true;
2866-
schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
2889+
schedule_delayed_work_on(cpu, &krcp->monitor_work,
2890+
KFREE_DRAIN_JIFFIES);
28672891
spin_unlock_irqrestore(&krcp->lock, flags);
28682892
}
28692893
}
@@ -3732,11 +3756,14 @@ struct workqueue_struct *rcu_par_gp_wq;
37323756
static void __init kfree_rcu_batch_init(void)
37333757
{
37343758
int cpu;
3759+
int i;
37353760

37363761
for_each_possible_cpu(cpu) {
37373762
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
37383763

37393764
spin_lock_init(&krcp->lock);
3765+
for (i = 0; i < KFREE_N_BATCHES; i++)
3766+
krcp->krw_arr[i].krcp = krcp;
37403767
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
37413768
krcp->initialized = true;
37423769
}

0 commit comments

Comments
 (0)