57
57
#include <linux/slab.h>
58
58
#include <linux/sched/isolation.h>
59
59
#include <linux/sched/clock.h>
60
+ #include <linux/vmalloc.h>
61
+ #include <linux/mm.h>
60
62
#include "../time/tick-internal.h"
61
63
62
64
#include "tree.h"
@@ -2966,46 +2968,47 @@ EXPORT_SYMBOL_GPL(call_rcu);
2966
2968
/* Maximum number of jiffies to wait before draining a batch. */
2967
2969
#define KFREE_DRAIN_JIFFIES (HZ / 50)
2968
2970
#define KFREE_N_BATCHES 2
2971
+ #define FREE_N_CHANNELS 2
2969
2972
2970
2973
/**
2971
- * struct kfree_rcu_bulk_data - single block to store kfree_rcu () pointers
2974
+ * struct kvfree_rcu_bulk_data - single block to store kvfree_rcu () pointers
2972
2975
* @nr_records: Number of active pointers in the array
2973
- * @records: Array of the kfree_rcu() pointers
2974
2976
* @next: Next bulk object in the block chain
2977
+ * @records: Array of the kvfree_rcu() pointers
2975
2978
*/
2976
- struct kfree_rcu_bulk_data {
2979
+ struct kvfree_rcu_bulk_data {
2977
2980
unsigned long nr_records ;
2978
- struct kfree_rcu_bulk_data * next ;
2981
+ struct kvfree_rcu_bulk_data * next ;
2979
2982
void * records [];
2980
2983
};
2981
2984
2982
2985
/*
2983
2986
* This macro defines how many entries the "records" array
2984
2987
* will contain. It is based on the fact that the size of
2985
- * kfree_rcu_bulk_data structure becomes exactly one page.
2988
+ * kvfree_rcu_bulk_data structure becomes exactly one page.
2986
2989
*/
2987
- #define KFREE_BULK_MAX_ENTR \
2988
- ((PAGE_SIZE - sizeof(struct kfree_rcu_bulk_data )) / sizeof(void *))
2990
+ #define KVFREE_BULK_MAX_ENTR \
2991
+ ((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data )) / sizeof(void *))
2989
2992
2990
2993
/**
2991
2994
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
2992
2995
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
2993
2996
* @head_free: List of kfree_rcu() objects waiting for a grace period
2994
- * @bhead_free : Bulk-List of kfree_rcu () objects waiting for a grace period
2997
+ * @bkvhead_free : Bulk-List of kvfree_rcu () objects waiting for a grace period
2995
2998
* @krcp: Pointer to @kfree_rcu_cpu structure
2996
2999
*/
2997
3000
2998
3001
struct kfree_rcu_cpu_work {
2999
3002
struct rcu_work rcu_work ;
3000
3003
struct rcu_head * head_free ;
3001
- struct kfree_rcu_bulk_data * bhead_free ;
3004
+ struct kvfree_rcu_bulk_data * bkvhead_free [ FREE_N_CHANNELS ] ;
3002
3005
struct kfree_rcu_cpu * krcp ;
3003
3006
};
3004
3007
3005
3008
/**
3006
3009
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
3007
3010
* @head: List of kfree_rcu() objects not yet waiting for a grace period
3008
- * @bhead : Bulk-List of kfree_rcu () objects not yet waiting for a grace period
3011
+ * @bkvhead : Bulk-List of kvfree_rcu () objects not yet waiting for a grace period
3009
3012
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
3010
3013
* @lock: Synchronize access to this structure
3011
3014
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
@@ -3020,7 +3023,7 @@ struct kfree_rcu_cpu_work {
3020
3023
*/
3021
3024
struct kfree_rcu_cpu {
3022
3025
struct rcu_head * head ;
3023
- struct kfree_rcu_bulk_data * bhead ;
3026
+ struct kvfree_rcu_bulk_data * bkvhead [ FREE_N_CHANNELS ] ;
3024
3027
struct kfree_rcu_cpu_work krw_arr [KFREE_N_BATCHES ];
3025
3028
raw_spinlock_t lock ;
3026
3029
struct delayed_work monitor_work ;
@@ -3044,7 +3047,7 @@ static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {
3044
3047
};
3045
3048
3046
3049
static __always_inline void
3047
- debug_rcu_bhead_unqueue (struct kfree_rcu_bulk_data * bhead )
3050
+ debug_rcu_bhead_unqueue (struct kvfree_rcu_bulk_data * bhead )
3048
3051
{
3049
3052
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
3050
3053
int i ;
@@ -3073,20 +3076,20 @@ krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags)
3073
3076
local_irq_restore (flags );
3074
3077
}
3075
3078
3076
- static inline struct kfree_rcu_bulk_data *
3079
+ static inline struct kvfree_rcu_bulk_data *
3077
3080
get_cached_bnode (struct kfree_rcu_cpu * krcp )
3078
3081
{
3079
3082
if (!krcp -> nr_bkv_objs )
3080
3083
return NULL ;
3081
3084
3082
3085
krcp -> nr_bkv_objs -- ;
3083
- return (struct kfree_rcu_bulk_data * )
3086
+ return (struct kvfree_rcu_bulk_data * )
3084
3087
llist_del_first (& krcp -> bkvcache );
3085
3088
}
3086
3089
3087
3090
static inline bool
3088
3091
put_cached_bnode (struct kfree_rcu_cpu * krcp ,
3089
- struct kfree_rcu_bulk_data * bnode )
3092
+ struct kvfree_rcu_bulk_data * bnode )
3090
3093
{
3091
3094
// Check the limit.
3092
3095
if (krcp -> nr_bkv_objs >= rcu_min_cached_objs )
@@ -3105,43 +3108,63 @@ put_cached_bnode(struct kfree_rcu_cpu *krcp,
3105
3108
static void kfree_rcu_work (struct work_struct * work )
3106
3109
{
3107
3110
unsigned long flags ;
3111
+ struct kvfree_rcu_bulk_data * bkvhead [FREE_N_CHANNELS ], * bnext ;
3108
3112
struct rcu_head * head , * next ;
3109
- struct kfree_rcu_bulk_data * bhead , * bnext ;
3110
3113
struct kfree_rcu_cpu * krcp ;
3111
3114
struct kfree_rcu_cpu_work * krwp ;
3115
+ int i , j ;
3112
3116
3113
3117
krwp = container_of (to_rcu_work (work ),
3114
3118
struct kfree_rcu_cpu_work , rcu_work );
3115
3119
krcp = krwp -> krcp ;
3120
+
3116
3121
raw_spin_lock_irqsave (& krcp -> lock , flags );
3122
+ // Channels 1 and 2.
3123
+ for (i = 0 ; i < FREE_N_CHANNELS ; i ++ ) {
3124
+ bkvhead [i ] = krwp -> bkvhead_free [i ];
3125
+ krwp -> bkvhead_free [i ] = NULL ;
3126
+ }
3127
+
3128
+ // Channel 3.
3117
3129
head = krwp -> head_free ;
3118
3130
krwp -> head_free = NULL ;
3119
- bhead = krwp -> bhead_free ;
3120
- krwp -> bhead_free = NULL ;
3121
3131
raw_spin_unlock_irqrestore (& krcp -> lock , flags );
3122
3132
3123
- /* "bhead" is now private, so traverse locklessly. */
3124
- for (; bhead ; bhead = bnext ) {
3125
- bnext = bhead -> next ;
3126
-
3127
- debug_rcu_bhead_unqueue (bhead );
3128
-
3129
- rcu_lock_acquire (& rcu_callback_map );
3130
- trace_rcu_invoke_kfree_bulk_callback (rcu_state .name ,
3131
- bhead -> nr_records , bhead -> records );
3132
-
3133
- kfree_bulk (bhead -> nr_records , bhead -> records );
3134
- rcu_lock_release (& rcu_callback_map );
3133
+ // Handle two first channels.
3134
+ for (i = 0 ; i < FREE_N_CHANNELS ; i ++ ) {
3135
+ for (; bkvhead [i ]; bkvhead [i ] = bnext ) {
3136
+ bnext = bkvhead [i ]-> next ;
3137
+ debug_rcu_bhead_unqueue (bkvhead [i ]);
3138
+
3139
+ rcu_lock_acquire (& rcu_callback_map );
3140
+ if (i == 0 ) { // kmalloc() / kfree().
3141
+ trace_rcu_invoke_kfree_bulk_callback (
3142
+ rcu_state .name , bkvhead [i ]-> nr_records ,
3143
+ bkvhead [i ]-> records );
3144
+
3145
+ kfree_bulk (bkvhead [i ]-> nr_records ,
3146
+ bkvhead [i ]-> records );
3147
+ } else { // vmalloc() / vfree().
3148
+ for (j = 0 ; j < bkvhead [i ]-> nr_records ; j ++ ) {
3149
+ trace_rcu_invoke_kfree_callback (
3150
+ rcu_state .name ,
3151
+ bkvhead [i ]-> records [j ], 0 );
3152
+
3153
+ vfree (bkvhead [i ]-> records [j ]);
3154
+ }
3155
+ }
3156
+ rcu_lock_release (& rcu_callback_map );
3135
3157
3136
- krcp = krc_this_cpu_lock (& flags );
3137
- if (put_cached_bnode (krcp , bhead ))
3138
- bhead = NULL ;
3139
- krc_this_cpu_unlock (krcp , flags );
3158
+ krcp = krc_this_cpu_lock (& flags );
3159
+ if (put_cached_bnode (krcp , bkvhead [ i ] ))
3160
+ bkvhead [ i ] = NULL ;
3161
+ krc_this_cpu_unlock (krcp , flags );
3140
3162
3141
- if (bhead )
3142
- free_page ((unsigned long ) bhead );
3163
+ if (bkvhead [ i ] )
3164
+ free_page ((unsigned long ) bkvhead [ i ] );
3143
3165
3144
- cond_resched_tasks_rcu_qs ();
3166
+ cond_resched_tasks_rcu_qs ();
3167
+ }
3145
3168
}
3146
3169
3147
3170
/*
@@ -3159,7 +3182,7 @@ static void kfree_rcu_work(struct work_struct *work)
3159
3182
trace_rcu_invoke_kfree_callback (rcu_state .name , head , offset );
3160
3183
3161
3184
if (!WARN_ON_ONCE (!__is_kfree_rcu_offset (offset )))
3162
- kfree (ptr );
3185
+ kvfree (ptr );
3163
3186
3164
3187
rcu_lock_release (& rcu_callback_map );
3165
3188
cond_resched_tasks_rcu_qs ();
@@ -3176,29 +3199,33 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
3176
3199
{
3177
3200
struct kfree_rcu_cpu_work * krwp ;
3178
3201
bool repeat = false;
3179
- int i ;
3202
+ int i , j ;
3180
3203
3181
3204
lockdep_assert_held (& krcp -> lock );
3182
3205
3183
3206
for (i = 0 ; i < KFREE_N_BATCHES ; i ++ ) {
3184
3207
krwp = & (krcp -> krw_arr [i ]);
3185
3208
3186
3209
/*
3187
- * Try to detach bhead or head and attach it over any
3210
+ * Try to detach bkvhead or head and attach it over any
3188
3211
* available corresponding free channel. It can be that
3189
3212
* a previous RCU batch is in progress, it means that
3190
3213
* immediately to queue another one is not possible so
3191
3214
* return false to tell caller to retry.
3192
3215
*/
3193
- if ((krcp -> bhead && !krwp -> bhead_free ) ||
3216
+ if ((krcp -> bkvhead [0 ] && !krwp -> bkvhead_free [0 ]) ||
3217
+ (krcp -> bkvhead [1 ] && !krwp -> bkvhead_free [1 ]) ||
3194
3218
(krcp -> head && !krwp -> head_free )) {
3195
- /* Channel 1. */
3196
- if (!krwp -> bhead_free ) {
3197
- krwp -> bhead_free = krcp -> bhead ;
3198
- krcp -> bhead = NULL ;
3219
+ // Channel 1 corresponds to SLAB ptrs.
3220
+ // Channel 2 corresponds to vmalloc ptrs.
3221
+ for (j = 0 ; j < FREE_N_CHANNELS ; j ++ ) {
3222
+ if (!krwp -> bkvhead_free [j ]) {
3223
+ krwp -> bkvhead_free [j ] = krcp -> bkvhead [j ];
3224
+ krcp -> bkvhead [j ] = NULL ;
3225
+ }
3199
3226
}
3200
3227
3201
- /* Channel 2. */
3228
+ // Channel 3 corresponds to emergency path.
3202
3229
if (!krwp -> head_free ) {
3203
3230
krwp -> head_free = krcp -> head ;
3204
3231
krcp -> head = NULL ;
@@ -3207,16 +3234,17 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
3207
3234
WRITE_ONCE (krcp -> count , 0 );
3208
3235
3209
3236
/*
3210
- * One work is per one batch, so there are two "free channels",
3211
- * "bhead_free" and "head_free" the batch can handle. It can be
3212
- * that the work is in the pending state when two channels have
3213
- * been detached following each other, one by one.
3237
+ * One work is per one batch, so there are three
3238
+ * "free channels", the batch can handle. It can
3239
+ * be that the work is in the pending state when
3240
+ * channels have been detached following by each
3241
+ * other.
3214
3242
*/
3215
3243
queue_rcu_work (system_wq , & krwp -> rcu_work );
3216
3244
}
3217
3245
3218
- /* Repeat if any "free" corresponding channel is still busy. */
3219
- if (krcp -> bhead || krcp -> head )
3246
+ // Repeat if any "free" corresponding channel is still busy.
3247
+ if (krcp -> bkvhead [ 0 ] || krcp -> bkvhead [ 1 ] || krcp -> head )
3220
3248
repeat = true;
3221
3249
}
3222
3250
@@ -3258,23 +3286,22 @@ static void kfree_rcu_monitor(struct work_struct *work)
3258
3286
}
3259
3287
3260
3288
static inline bool
3261
- kfree_call_rcu_add_ptr_to_bulk (struct kfree_rcu_cpu * krcp ,
3262
- struct rcu_head * head , rcu_callback_t func )
3289
+ kvfree_call_rcu_add_ptr_to_bulk (struct kfree_rcu_cpu * krcp , void * ptr )
3263
3290
{
3264
- struct kfree_rcu_bulk_data * bnode ;
3291
+ struct kvfree_rcu_bulk_data * bnode ;
3292
+ int idx ;
3265
3293
3266
3294
if (unlikely (!krcp -> initialized ))
3267
3295
return false;
3268
3296
3269
3297
lockdep_assert_held (& krcp -> lock );
3298
+ idx = !!is_vmalloc_addr (ptr );
3270
3299
3271
3300
/* Check if a new block is required. */
3272
- if (!krcp -> bhead ||
3273
- krcp -> bhead -> nr_records == KFREE_BULK_MAX_ENTR ) {
3301
+ if (!krcp -> bkvhead [ idx ] ||
3302
+ krcp -> bkvhead [ idx ] -> nr_records == KVFREE_BULK_MAX_ENTR ) {
3274
3303
bnode = get_cached_bnode (krcp );
3275
3304
if (!bnode ) {
3276
- WARN_ON_ONCE (sizeof (struct kfree_rcu_bulk_data ) > PAGE_SIZE );
3277
-
3278
3305
/*
3279
3306
* To keep this path working on raw non-preemptible
3280
3307
* sections, prevent the optional entry into the
@@ -3287,7 +3314,7 @@ kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp,
3287
3314
if (IS_ENABLED (CONFIG_PREEMPT_RT ))
3288
3315
return false;
3289
3316
3290
- bnode = (struct kfree_rcu_bulk_data * )
3317
+ bnode = (struct kvfree_rcu_bulk_data * )
3291
3318
__get_free_page (GFP_NOWAIT | __GFP_NOWARN );
3292
3319
}
3293
3320
@@ -3297,30 +3324,30 @@ kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp,
3297
3324
3298
3325
/* Initialize the new block. */
3299
3326
bnode -> nr_records = 0 ;
3300
- bnode -> next = krcp -> bhead ;
3327
+ bnode -> next = krcp -> bkvhead [ idx ] ;
3301
3328
3302
3329
/* Attach it to the head. */
3303
- krcp -> bhead = bnode ;
3330
+ krcp -> bkvhead [ idx ] = bnode ;
3304
3331
}
3305
3332
3306
3333
/* Finally insert. */
3307
- krcp -> bhead -> records [ krcp -> bhead -> nr_records ++ ] =
3308
- ( void * ) head - ( unsigned long ) func ;
3334
+ krcp -> bkvhead [ idx ] -> records
3335
+ [ krcp -> bkvhead [ idx ] -> nr_records ++ ] = ptr ;
3309
3336
3310
3337
return true;
3311
3338
}
3312
3339
3313
3340
/*
3314
- * Queue a request for lazy invocation of kfree_bulk()/kfree() after a grace
3315
- * period. Please note there are two paths are maintained, one is the main one
3316
- * that uses kfree_bulk() interface and second one is emergency one, that is
3317
- * used only when the main path can not be maintained temporary, due to memory
3318
- * pressure.
3341
+ * Queue a request for lazy invocation of appropriate free routine after a
3342
+ * grace period. Please note there are three paths are maintained, two are the
3343
+ * main ones that use array of pointers interface and third one is emergency
3344
+ * one, that is used only when the main path can not be maintained temporary,
3345
+ * due to memory pressure.
3319
3346
*
3320
3347
* Each kfree_call_rcu() request is added to a batch. The batch will be drained
3321
3348
* every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will
3322
3349
* be free'd in workqueue context. This allows us to: batch requests together to
3323
- * reduce the number of grace periods during heavy kfree_rcu() load.
3350
+ * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load.
3324
3351
*/
3325
3352
void kfree_call_rcu (struct rcu_head * head , rcu_callback_t func )
3326
3353
{
@@ -3343,7 +3370,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
3343
3370
* Under high memory pressure GFP_NOWAIT can fail,
3344
3371
* in that case the emergency path is maintained.
3345
3372
*/
3346
- if (unlikely (!kfree_call_rcu_add_ptr_to_bulk (krcp , head , func ))) {
3373
+ if (unlikely (!kvfree_call_rcu_add_ptr_to_bulk (krcp , ptr ))) {
3347
3374
head -> func = func ;
3348
3375
head -> next = krcp -> head ;
3349
3376
krcp -> head = head ;
@@ -4324,15 +4351,15 @@ static void __init kfree_rcu_batch_init(void)
4324
4351
4325
4352
for_each_possible_cpu (cpu ) {
4326
4353
struct kfree_rcu_cpu * krcp = per_cpu_ptr (& krc , cpu );
4327
- struct kfree_rcu_bulk_data * bnode ;
4354
+ struct kvfree_rcu_bulk_data * bnode ;
4328
4355
4329
4356
for (i = 0 ; i < KFREE_N_BATCHES ; i ++ ) {
4330
4357
INIT_RCU_WORK (& krcp -> krw_arr [i ].rcu_work , kfree_rcu_work );
4331
4358
krcp -> krw_arr [i ].krcp = krcp ;
4332
4359
}
4333
4360
4334
4361
for (i = 0 ; i < rcu_min_cached_objs ; i ++ ) {
4335
- bnode = (struct kfree_rcu_bulk_data * )
4362
+ bnode = (struct kvfree_rcu_bulk_data * )
4336
4363
__get_free_page (GFP_NOWAIT | __GFP_NOWARN );
4337
4364
4338
4365
if (bnode )
0 commit comments