Skip to content

Commit 2ca836b

Browse files
urezkipaulmckrcu
authored andcommitted
rcu/kvfree: Split ready for reclaim objects from a batch
This patch splits the lists of objects so as to avoid sending any through RCU that have already been queued for more than one grace period. These long-term-resident objects are immediately freed. The remaining short-term-resident objects are queued for later freeing using queue_rcu_work(). This change avoids delaying workqueue handlers with synchronize_rcu() invocations. Yes, workqueue handlers are designed to handle blocking, but avoiding blocking when unnecessary improves performance during low-memory situations. Signed-off-by: Uladzislau Rezki (Sony) <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 4c33464 commit 2ca836b

File tree

1 file changed

+54
-33
lines changed

1 file changed

+54
-33
lines changed

kernel/rcu/tree.c

Lines changed: 54 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2900,22 +2900,21 @@ struct kvfree_rcu_bulk_data {
29002900
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
29012901
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
29022902
* @head_free: List of kfree_rcu() objects waiting for a grace period
2903-
* @head_free_gp_snap: Snapshot of RCU state for objects placed to "@head_free"
29042903
* @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
29052904
* @krcp: Pointer to @kfree_rcu_cpu structure
29062905
*/
29072906

29082907
struct kfree_rcu_cpu_work {
2909-
struct work_struct rcu_work;
2908+
struct rcu_work rcu_work;
29102909
struct rcu_head *head_free;
2911-
unsigned long head_free_gp_snap;
29122910
struct list_head bulk_head_free[FREE_N_CHANNELS];
29132911
struct kfree_rcu_cpu *krcp;
29142912
};
29152913

29162914
/**
29172915
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
29182916
* @head: List of kfree_rcu() objects not yet waiting for a grace period
2917+
* @head_gp_snap: Snapshot of RCU state for objects placed to "@head"
29192918
* @bulk_head: Bulk-List of kvfree_rcu() objects not yet waiting for a grace period
29202919
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
29212920
* @lock: Synchronize access to this structure
@@ -2943,6 +2942,7 @@ struct kfree_rcu_cpu {
29432942
// Objects queued on a linked list
29442943
// through their rcu_head structures.
29452944
struct rcu_head *head;
2945+
unsigned long head_gp_snap;
29462946
atomic_t head_count;
29472947

29482948
// Objects queued on a bulk-list.
@@ -3111,10 +3111,9 @@ static void kfree_rcu_work(struct work_struct *work)
31113111
struct rcu_head *head;
31123112
struct kfree_rcu_cpu *krcp;
31133113
struct kfree_rcu_cpu_work *krwp;
3114-
unsigned long head_free_gp_snap;
31153114
int i;
31163115

3117-
krwp = container_of(work,
3116+
krwp = container_of(to_rcu_work(work),
31183117
struct kfree_rcu_cpu_work, rcu_work);
31193118
krcp = krwp->krcp;
31203119

@@ -3126,26 +3125,11 @@ static void kfree_rcu_work(struct work_struct *work)
31263125
// Channel 3.
31273126
head = krwp->head_free;
31283127
krwp->head_free = NULL;
3129-
head_free_gp_snap = krwp->head_free_gp_snap;
31303128
raw_spin_unlock_irqrestore(&krcp->lock, flags);
31313129

31323130
// Handle the first two channels.
31333131
for (i = 0; i < FREE_N_CHANNELS; i++) {
31343132
// Start from the tail page, so a GP is likely passed for it.
3135-
list_for_each_entry_safe_reverse(bnode, n, &bulk_head[i], list) {
3136-
// Not yet ready? Bail out since we need one more GP.
3137-
if (!poll_state_synchronize_rcu(bnode->gp_snap))
3138-
break;
3139-
3140-
list_del_init(&bnode->list);
3141-
kvfree_rcu_bulk(krcp, bnode, i);
3142-
}
3143-
3144-
// Please note a request for one more extra GP can
3145-
// occur only once for all objects in this batch.
3146-
if (!list_empty(&bulk_head[i]))
3147-
synchronize_rcu();
3148-
31493133
list_for_each_entry_safe(bnode, n, &bulk_head[i], list)
31503134
kvfree_rcu_bulk(krcp, bnode, i);
31513135
}
@@ -3157,10 +3141,7 @@ static void kfree_rcu_work(struct work_struct *work)
31573141
* queued on a linked list through their rcu_head structures.
31583142
* This list is named "Channel 3".
31593143
*/
3160-
if (head) {
3161-
cond_synchronize_rcu(head_free_gp_snap);
3162-
kvfree_rcu_list(head);
3163-
}
3144+
kvfree_rcu_list(head);
31643145
}
31653146

31663147
static bool
@@ -3201,6 +3182,44 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
32013182
queue_delayed_work(system_wq, &krcp->monitor_work, delay);
32023183
}
32033184

3185+
static void
3186+
kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
3187+
{
3188+
struct list_head bulk_ready[FREE_N_CHANNELS];
3189+
struct kvfree_rcu_bulk_data *bnode, *n;
3190+
struct rcu_head *head_ready = NULL;
3191+
unsigned long flags;
3192+
int i;
3193+
3194+
raw_spin_lock_irqsave(&krcp->lock, flags);
3195+
for (i = 0; i < FREE_N_CHANNELS; i++) {
3196+
INIT_LIST_HEAD(&bulk_ready[i]);
3197+
3198+
list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {
3199+
if (!poll_state_synchronize_rcu(bnode->gp_snap))
3200+
break;
3201+
3202+
atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);
3203+
list_move(&bnode->list, &bulk_ready[i]);
3204+
}
3205+
}
3206+
3207+
if (krcp->head && poll_state_synchronize_rcu(krcp->head_gp_snap)) {
3208+
head_ready = krcp->head;
3209+
atomic_set(&krcp->head_count, 0);
3210+
WRITE_ONCE(krcp->head, NULL);
3211+
}
3212+
raw_spin_unlock_irqrestore(&krcp->lock, flags);
3213+
3214+
for (i = 0; i < FREE_N_CHANNELS; i++) {
3215+
list_for_each_entry_safe(bnode, n, &bulk_ready[i], list)
3216+
kvfree_rcu_bulk(krcp, bnode, i);
3217+
}
3218+
3219+
if (head_ready)
3220+
kvfree_rcu_list(head_ready);
3221+
}
3222+
32043223
/*
32053224
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
32063225
*/
@@ -3211,6 +3230,9 @@ static void kfree_rcu_monitor(struct work_struct *work)
32113230
unsigned long flags;
32123231
int i, j;
32133232

3233+
// Drain ready for reclaim.
3234+
kvfree_rcu_drain_ready(krcp);
3235+
32143236
raw_spin_lock_irqsave(&krcp->lock, flags);
32153237

32163238
// Attempt to start a new batch.
@@ -3230,30 +3252,26 @@ static void kfree_rcu_monitor(struct work_struct *work)
32303252
// Channel 2 corresponds to vmalloc-pointer bulk path.
32313253
for (j = 0; j < FREE_N_CHANNELS; j++) {
32323254
if (list_empty(&krwp->bulk_head_free[j])) {
3233-
list_replace_init(&krcp->bulk_head[j], &krwp->bulk_head_free[j]);
32343255
atomic_set(&krcp->bulk_count[j], 0);
3256+
list_replace_init(&krcp->bulk_head[j],
3257+
&krwp->bulk_head_free[j]);
32353258
}
32363259
}
32373260

32383261
// Channel 3 corresponds to both SLAB and vmalloc
32393262
// objects queued on the linked list.
32403263
if (!krwp->head_free) {
32413264
krwp->head_free = krcp->head;
3242-
WRITE_ONCE(krcp->head, NULL);
32433265
atomic_set(&krcp->head_count, 0);
3244-
3245-
// Take a snapshot for this krwp. Please note no more
3246-
// any objects can be added to attached head_free channel
3247-
// therefore fixate a GP for it here.
3248-
krwp->head_free_gp_snap = get_state_synchronize_rcu();
3266+
WRITE_ONCE(krcp->head, NULL);
32493267
}
32503268

32513269
// One work is per one batch, so there are three
32523270
// "free channels", the batch can handle. It can
32533271
// be that the work is in the pending state when
32543272
// channels have been detached following by each
32553273
// other.
3256-
queue_work(system_wq, &krwp->rcu_work);
3274+
queue_rcu_work(system_wq, &krwp->rcu_work);
32573275
}
32583276
}
32593277

@@ -3440,6 +3458,9 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
34403458
head->next = krcp->head;
34413459
WRITE_ONCE(krcp->head, head);
34423460
atomic_inc(&krcp->head_count);
3461+
3462+
// Take a snapshot for this krcp.
3463+
krcp->head_gp_snap = get_state_synchronize_rcu();
34433464
success = true;
34443465
}
34453466

@@ -4834,7 +4855,7 @@ static void __init kfree_rcu_batch_init(void)
48344855
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
48354856

48364857
for (i = 0; i < KFREE_N_BATCHES; i++) {
4837-
INIT_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
4858+
INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
48384859
krcp->krw_arr[i].krcp = krcp;
48394860

48404861
for (j = 0; j < FREE_N_CHANNELS; j++)

0 commit comments

Comments
 (0)