Skip to content

Commit 4c33464

Browse files
urezkipaulmckrcu
authored andcommitted
rcu/kvfree: Carefully reset number of objects in krcp
The schedule_delayed_monitor_work() function relies on the count of objects queued into any given kfree_rcu_cpu structure. This count is used to determine how quickly to schedule passing these objects to RCU. There are three pipes where pointers can be placed. When any pipe is offloaded, the kfree_rcu_cpu structure's ->count counter is set to zero, which is wrong because the other pipes might still be non-empty. This commit therefore maintains per-pipe counters, and introduces a krc_count() helper to access the aggregate value of those counters. Signed-off-by: Uladzislau Rezki (Sony) <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 9627456 commit 4c33464

File tree

1 file changed

+30
-10
lines changed

1 file changed

+30
-10
lines changed

kernel/rcu/tree.c

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,7 +2921,8 @@ struct kfree_rcu_cpu_work {
29212921
* @lock: Synchronize access to this structure
29222922
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
29232923
* @initialized: The @rcu_work fields have been initialized
2924-
* @count: Number of objects for which GP not started
2924+
* @head_count: Number of objects in rcu_head singular list
2925+
* @bulk_count: Number of objects in bulk-list
29252926
* @bkvcache:
29262927
* A simple cache list that contains objects for reuse purpose.
29272928
* In order to save some per-cpu space the list is singular.
@@ -2939,13 +2940,19 @@ struct kfree_rcu_cpu_work {
29392940
* the interactions with the slab allocators.
29402941
*/
29412942
struct kfree_rcu_cpu {
2943+
// Objects queued on a linked list
2944+
// through their rcu_head structures.
29422945
struct rcu_head *head;
2946+
atomic_t head_count;
2947+
2948+
// Objects queued on a bulk-list.
29432949
struct list_head bulk_head[FREE_N_CHANNELS];
2950+
atomic_t bulk_count[FREE_N_CHANNELS];
2951+
29442952
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
29452953
raw_spinlock_t lock;
29462954
struct delayed_work monitor_work;
29472955
bool initialized;
2948-
int count;
29492956

29502957
struct delayed_work page_cache_work;
29512958
atomic_t backoff_page_cache_fill;
@@ -3168,12 +3175,23 @@ need_offload_krc(struct kfree_rcu_cpu *krcp)
31683175
return !!READ_ONCE(krcp->head);
31693176
}
31703177

3178+
static int krc_count(struct kfree_rcu_cpu *krcp)
3179+
{
3180+
int sum = atomic_read(&krcp->head_count);
3181+
int i;
3182+
3183+
for (i = 0; i < FREE_N_CHANNELS; i++)
3184+
sum += atomic_read(&krcp->bulk_count[i]);
3185+
3186+
return sum;
3187+
}
3188+
31713189
static void
31723190
schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
31733191
{
31743192
long delay, delay_left;
31753193

3176-
delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
3194+
delay = krc_count(krcp) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
31773195
if (delayed_work_pending(&krcp->monitor_work)) {
31783196
delay_left = krcp->monitor_work.timer.expires - jiffies;
31793197
if (delay < delay_left)
@@ -3211,24 +3229,25 @@ static void kfree_rcu_monitor(struct work_struct *work)
32113229
// Channel 1 corresponds to the SLAB-pointer bulk path.
32123230
// Channel 2 corresponds to vmalloc-pointer bulk path.
32133231
for (j = 0; j < FREE_N_CHANNELS; j++) {
3214-
if (list_empty(&krwp->bulk_head_free[j]))
3232+
if (list_empty(&krwp->bulk_head_free[j])) {
32153233
list_replace_init(&krcp->bulk_head[j], &krwp->bulk_head_free[j]);
3234+
atomic_set(&krcp->bulk_count[j], 0);
3235+
}
32163236
}
32173237

32183238
// Channel 3 corresponds to both SLAB and vmalloc
32193239
// objects queued on the linked list.
32203240
if (!krwp->head_free) {
32213241
krwp->head_free = krcp->head;
32223242
WRITE_ONCE(krcp->head, NULL);
3243+
atomic_set(&krcp->head_count, 0);
32233244

32243245
// Take a snapshot for this krwp. Please note no more
32253246
// any objects can be added to attached head_free channel
32263247
// therefore fixate a GP for it here.
32273248
krwp->head_free_gp_snap = get_state_synchronize_rcu();
32283249
}
32293250

3230-
WRITE_ONCE(krcp->count, 0);
3231-
32323251
// One work is per one batch, so there are three
32333252
// "free channels", the batch can handle. It can
32343253
// be that the work is in the pending state when
@@ -3365,6 +3384,8 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
33653384
// Finally insert and update the GP for this page.
33663385
bnode->records[bnode->nr_records++] = ptr;
33673386
bnode->gp_snap = get_state_synchronize_rcu();
3387+
atomic_inc(&(*krcp)->bulk_count[idx]);
3388+
33683389
return true;
33693390
}
33703391

@@ -3418,11 +3439,10 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
34183439
head->func = ptr;
34193440
head->next = krcp->head;
34203441
WRITE_ONCE(krcp->head, head);
3442+
atomic_inc(&krcp->head_count);
34213443
success = true;
34223444
}
34233445

3424-
WRITE_ONCE(krcp->count, krcp->count + 1);
3425-
34263446
// Set timer to drain after KFREE_DRAIN_JIFFIES.
34273447
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
34283448
schedule_delayed_monitor_work(krcp);
@@ -3453,7 +3473,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
34533473
for_each_possible_cpu(cpu) {
34543474
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
34553475

3456-
count += READ_ONCE(krcp->count);
3476+
count += krc_count(krcp);
34573477
count += READ_ONCE(krcp->nr_bkv_objs);
34583478
atomic_set(&krcp->backoff_page_cache_fill, 1);
34593479
}
@@ -3470,7 +3490,7 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
34703490
int count;
34713491
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
34723492

3473-
count = krcp->count;
3493+
count = krc_count(krcp);
34743494
count += drain_page_cache(krcp);
34753495
kfree_rcu_monitor(&krcp->monitor_work.work);
34763496

0 commit comments

Comments
 (0)