Skip to content

Commit ca74b8c

Browse files
committed
Merge series "slab: Re-entrant kmalloc_nolock()"
From the cover letter [1]: This patch set introduces kmalloc_nolock() which is the next logical step towards any context allocation necessary to remove bpf_mem_alloc and get rid of preallocation requirement in BPF infrastructure. In production BPF maps grew to gigabytes in size. Preallocation wastes memory. Alloc from any context addresses this issue for BPF and other subsystems that are forced to preallocate too. This long task started with introduction of alloc_pages_nolock(), then memcg and objcg were converted to operate from any context including NMI, this set completes the task with kmalloc_nolock() that builds on top of alloc_pages_nolock() and memcg changes. After that BPF subsystem will gradually adopt it everywhere. Link: https://lore.kernel.org/all/[email protected]/ [1]
2 parents b912061 + af92793 commit ca74b8c

File tree

17 files changed

+571
-93
lines changed

17 files changed

+571
-93
lines changed

include/linux/gfp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
354354
}
355355
#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
356356

357-
struct page *alloc_pages_nolock_noprof(int nid, unsigned int order);
357+
struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
358358
#define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__))
359359

360360
extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);

include/linux/kasan.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s,
200200
}
201201

202202
bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
203-
bool still_accessible);
203+
bool still_accessible, bool no_quarantine);
204204
/**
205205
* kasan_slab_free - Poison, initialize, and quarantine a slab object.
206206
* @object: Object to be freed.
@@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init,
226226
* @Return true if KASAN took ownership of the object; false otherwise.
227227
*/
228228
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
229-
void *object, bool init,
230-
bool still_accessible)
229+
void *object, bool init,
230+
bool still_accessible,
231+
bool no_quarantine)
231232
{
232233
if (kasan_enabled())
233-
return __kasan_slab_free(s, object, init, still_accessible);
234+
return __kasan_slab_free(s, object, init, still_accessible,
235+
no_quarantine);
234236
return false;
235237
}
236238

@@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object)
427429
}
428430

429431
static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
430-
bool init, bool still_accessible)
432+
bool init, bool still_accessible,
433+
bool no_quarantine)
431434
{
432435
return false;
433436
}

include/linux/local_lock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
*/
6767
#define local_trylock(lock) __local_trylock(this_cpu_ptr(lock))
6868

69+
#define local_lock_is_locked(lock) __local_lock_is_locked(lock)
70+
6971
/**
7072
* local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
7173
* interrupts if acquired

include/linux/local_lock_internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ do { \
165165
!!tl; \
166166
})
167167

168+
/* preemption or migration must be disabled before calling __local_lock_is_locked */
169+
#define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired)
170+
168171
#define __local_lock_release(lock) \
169172
do { \
170173
local_trylock_t *tl; \
@@ -285,4 +288,8 @@ do { \
285288
__local_trylock(lock); \
286289
})
287290

291+
/* migration must be disabled before calling __local_lock_is_locked */
292+
#define __local_lock_is_locked(__lock) \
293+
(rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current)
294+
288295
#endif /* CONFIG_PREEMPT_RT */

include/linux/memcontrol.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,17 +341,25 @@ enum page_memcg_data_flags {
341341
__NR_MEMCG_DATA_FLAGS = (1UL << 2),
342342
};
343343

344+
#define __OBJEXTS_ALLOC_FAIL MEMCG_DATA_OBJEXTS
344345
#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS
345346

346347
#else /* CONFIG_MEMCG */
347348

349+
#define __OBJEXTS_ALLOC_FAIL (1UL << 0)
348350
#define __FIRST_OBJEXT_FLAG (1UL << 0)
349351

350352
#endif /* CONFIG_MEMCG */
351353

352354
enum objext_flags {
353-
/* slabobj_ext vector failed to allocate */
354-
OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG,
355+
/*
356+
* Use bit 0 with zero other bits to signal that slabobj_ext vector
357+
* failed to allocate. The same bit 0 with valid upper bits means
358+
* MEMCG_DATA_OBJEXTS.
359+
*/
360+
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
361+
/* slabobj_ext vector allocated with kmalloc_nolock() */
362+
OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
355363
/* the next bit after the last actual flag */
356364
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
357365
};

include/linux/rtmutex.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
4444
return READ_ONCE(lock->owner) != NULL;
4545
}
4646

47+
#ifdef CONFIG_RT_MUTEXES
48+
#define RT_MUTEX_HAS_WAITERS 1UL
49+
50+
static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
51+
{
52+
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
53+
54+
return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
55+
}
56+
#endif
4757
extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
4858

4959
/**

include/linux/slab.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size,
501501
#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
502502

503503
void kfree(const void *objp);
504+
void kfree_nolock(const void *objp);
504505
void kfree_sensitive(const void *objp);
505506
size_t __ksize(const void *objp);
506507

@@ -957,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
957958
}
958959
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
959960

961+
void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
962+
#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
963+
960964
#define kmem_buckets_alloc(_b, _size, _flags) \
961965
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
962966

kernel/bpf/stream.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ static struct bpf_stream_page *bpf_stream_page_replace(void)
8383
struct bpf_stream_page *stream_page, *old_stream_page;
8484
struct page *page;
8585

86-
page = alloc_pages_nolock(NUMA_NO_NODE, 0);
86+
page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
8787
if (!page)
8888
return NULL;
8989
stream_page = page_address(page);

kernel/bpf/syscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ static bool can_alloc_pages(void)
581581
static struct page *__bpf_alloc_page(int nid)
582582
{
583583
if (!can_alloc_pages())
584-
return alloc_pages_nolock(nid, 0);
584+
return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0);
585585

586586
return alloc_pages_node(nid,
587587
GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT

kernel/locking/rtmutex_common.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,6 @@ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
153153
pi_tree.entry);
154154
}
155155

156-
#define RT_MUTEX_HAS_WAITERS 1UL
157-
158-
static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
159-
{
160-
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
161-
162-
return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
163-
}
164-
165156
/*
166157
* Constants for rt mutex functions which have a selectable deadlock
167158
* detection.

0 commit comments

Comments
 (0)