Skip to content

Commit af92793

Browse files
Alexei Starovoitovtehcaster
authored andcommitted
slab: Introduce kmalloc_nolock() and kfree_nolock().
kmalloc_nolock() relies on ability of local_trylock_t to detect the situation when per-cpu kmem_cache is locked. In !PREEMPT_RT local_(try)lock_irqsave(&s->cpu_slab->lock, flags) disables IRQs and marks s->cpu_slab->lock as acquired. local_lock_is_locked(&s->cpu_slab->lock) returns true when slab is in the middle of manipulating per-cpu cache of that specific kmem_cache. kmalloc_nolock() can be called from any context and can re-enter into ___slab_alloc(): kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> NMI -> bpf -> kmalloc_nolock() -> ___slab_alloc(cache_B) or kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock() -> ___slab_alloc(cache_B) Hence the caller of ___slab_alloc() checks if &s->cpu_slab->lock can be acquired without a deadlock before invoking the function. If that specific per-cpu kmem_cache is busy the kmalloc_nolock() retries in a different kmalloc bucket. The second attempt will likely succeed, since this cpu locked different kmem_cache. Similarly, in PREEMPT_RT local_lock_is_locked() returns true when per-cpu rt_spin_lock is locked by current _task_. In this case re-entrance into the same kmalloc bucket is unsafe, and kmalloc_nolock() tries a different bucket that is most likely is not locked by the current task. Though it may be locked by a different task it's safe to rt_spin_lock() and sleep on it. Similar to alloc_pages_nolock() the kmalloc_nolock() returns NULL immediately if called from hard irq or NMI in PREEMPT_RT. kfree_nolock() defers freeing to irq_work when local_lock_is_locked() and (in_nmi() or in PREEMPT_RT). SLUB_TINY config doesn't use local_lock_is_locked() and relies on spin_trylock_irqsave(&n->list_lock) to allocate, while kfree_nolock() always defers to irq_work. Note, kfree_nolock() must be called _only_ for objects allocated with kmalloc_nolock(). Debug checks (like kmemleak and kfence) were skipped on allocation, hence obj = kmalloc(); kfree_nolock(obj); will miss kmemleak/kfence book keeping and will cause false positives. large_kmalloc is not supported by either kmalloc_nolock() or kfree_nolock(). Signed-off-by: Alexei Starovoitov <[email protected]> Reviewed-by: Harry Yoo <[email protected]> Signed-off-by: Vlastimil Babka <[email protected]>
1 parent 7612833 commit af92793

File tree

8 files changed

+483
-55
lines changed

8 files changed

+483
-55
lines changed

include/linux/kasan.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
200200
}
201201

202202
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
203-
bool still_accessible);
203+
bool still_accessible, bool no_quarantine);
204204
/**
205205
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
206206
* @object: Object to be freed.
@@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
226226
* @Return true if KASAN took ownership of the object; false otherwise.
227227
*/
228228
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
229-
void *object, bool init,
230-
bool still_accessible)
229+
void *object, bool init,
230+
bool still_accessible,
231+
bool no_quarantine)
231232
{
232233
if (kasan_enabled())
233-
return __kasan_slab_free(s, object, init, still_accessible);
234+
return __kasan_slab_free(s, object, init, still_accessible,
235+
no_quarantine);
234236
return false;
235237
}
236238

@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
427429
}
428430

429431
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
430-
bool init, bool still_accessible)
432+
bool init, bool still_accessible,
433+
bool no_quarantine)
431434
{
432435
return false;
433436
}

include/linux/memcontrol.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,8 @@ enum objext_flags {
358358
* MEMCG_DATA_OBJEXTS.
359359
*/
360360
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
361+
/* slabobj_ext vector allocated with kmalloc_nolock() */
362+
OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
361363
/* the next bit after the last actual flag */
362364
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
363365
};

include/linux/slab.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
501501
#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
502502

503503
void kfree(const void *objp);
504+
void kfree_nolock(const void *objp);
504505
void kfree_sensitive(const void *objp);
505506
size_t __ksize(const void *objp);
506507

@@ -957,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
957958
}
958959
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
959960

961+
void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
962+
#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
963+
960964
#define kmem_buckets_alloc(_b, _size, _flags) \
961965
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
962966

mm/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ menu "Slab allocator options"
194194

195195
config SLUB
196196
def_bool y
197+
select IRQ_WORK
197198

198199
config KVFREE_RCU_BATCHED
199200
def_bool y

mm/kasan/common.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ bool __kasan_slab_pre_free(struct kmem_cache *cache, void *object,
252252
}
253253

254254
bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
255-
bool still_accessible)
255+
bool still_accessible, bool no_quarantine)
256256
{
257257
if (!kasan_arch_is_ready() || is_kfence_address(object))
258258
return false;
@@ -274,6 +274,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, bool init,
274274

275275
poison_slab_object(cache, object, init);
276276

277+
if (no_quarantine)
278+
return false;
279+
277280
/*
278281
* If the object is put into quarantine, do not let slab put the object
279282
* onto the freelist for now. The object's metadata is kept until the

mm/slab.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ struct slab {
5757
struct {
5858
union {
5959
struct list_head slab_list;
60+
struct { /* For deferred deactivate_slab() */
61+
struct llist_node llnode;
62+
void *flush_freelist;
63+
};
6064
#ifdef CONFIG_SLUB_CPU_PARTIAL
6165
struct {
6266
struct slab *next;
@@ -662,6 +666,8 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
662666
void __check_heap_object(const void *ptr, unsigned long n,
663667
const struct slab *slab, bool to_user);
664668

669+
void defer_free_barrier(void);
670+
665671
static inline bool slub_debug_orig_size(struct kmem_cache *s)
666672
{
667673
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&

mm/slab_common.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,9 @@ void kmem_cache_destroy(struct kmem_cache *s)
510510
rcu_barrier();
511511
}
512512

513+
/* Wait for deferred work from kmalloc/kfree_nolock() */
514+
defer_free_barrier();
515+
513516
cpus_read_lock();
514517
mutex_lock(&slab_mutex);
515518

0 commit comments

Comments
 (0)