Skip to content

Commit 05b00ff

Browse files
committed
Merge tag 'slab-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: - Move the TINY_RCU kvfree_rcu() implementation from RCU to SLAB subsystem and cleanup its integration (Vlastimil Babka) Following the move of the TREE_RCU batching kvfree_rcu() implementation in 6.14, move also the simpler TINY_RCU variant. Refactor the #ifdef guards so that the simple implementation is also used with SLUB_TINY. Remove the need for RCU to recognize fake callback function pointers (__is_kvfree_rcu_offset()) when handling call_rcu() by implementing a callback that calculates the object's address from the embedded rcu_head address without knowing its offset. - Improve kmalloc cache randomization in kvmalloc (GONG Ruiqi) Due to an extra layer of function call, all kvmalloc() allocations used the same set of random caches. Thanks to moving the kvmalloc() implementation to slub.c, this is improved and randomization now works for kvmalloc. - Various improvements to debugging, testing and other cleanups (Hyesoo Yu, Lilith Gkini, Uladzislau Rezki, Matthew Wilcox, Kevin Brodsky, Ye Bin) * tag 'slab-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: slub: Handle freelist cycle in on_freelist() mm/slab: call kmalloc_noprof() unconditionally in kmalloc_array_noprof() slab: Mark large folios for debugging purposes kunit, slub: Add test_kfree_rcu_wq_destroy use case mm, slab: cleanup slab_bug() parameters mm: slub: call WARN() when detecting a slab corruption mm: slub: Print the broken data before restoring them slab: Achieve better kmalloc caches randomization in kvmalloc slab: Adjust placement of __kvmalloc_node_noprof mm/slab: simplify SLAB_* flag handling slab: don't batch kvfree_rcu() with SLUB_TINY rcu, slab: use a regular callback function for kvfree_rcu rcu: remove trace_rcu_kvfree_callback slab, rcu: move TINY_RCU variant of kvfree_rcu() to SLAB
2 parents 95c61e1 + dea2d92 commit 05b00ff

File tree

14 files changed

+430
-383
lines changed

14 files changed

+430
-383
lines changed

include/linux/page-flags.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -925,14 +925,15 @@ FOLIO_FLAG_FALSE(has_hwpoisoned)
925925
enum pagetype {
926926
/* 0x00-0x7f are positive numbers, ie mapcount */
927927
/* Reserve 0x80-0xef for mapcount overflow. */
928-
PGTY_buddy = 0xf0,
929-
PGTY_offline = 0xf1,
930-
PGTY_table = 0xf2,
931-
PGTY_guard = 0xf3,
932-
PGTY_hugetlb = 0xf4,
933-
PGTY_slab = 0xf5,
934-
PGTY_zsmalloc = 0xf6,
935-
PGTY_unaccepted = 0xf7,
928+
PGTY_buddy = 0xf0,
929+
PGTY_offline = 0xf1,
930+
PGTY_table = 0xf2,
931+
PGTY_guard = 0xf3,
932+
PGTY_hugetlb = 0xf4,
933+
PGTY_slab = 0xf5,
934+
PGTY_zsmalloc = 0xf6,
935+
PGTY_unaccepted = 0xf7,
936+
PGTY_large_kmalloc = 0xf8,
936937

937938
PGTY_mapcount_underflow = 0xff
938939
};
@@ -1075,6 +1076,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
10751076
* Serialized with zone lock.
10761077
*/
10771078
PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted)
1079+
FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc)
10781080

10791081
/**
10801082
* PageHuge - Determine if the page belongs to hugetlbfs

include/linux/rcupdate.h

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,12 +1025,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
10251025
#define RCU_POINTER_INITIALIZER(p, v) \
10261026
.p = RCU_INITIALIZER(v)
10271027

1028-
/*
1029-
* Does the specified offset indicate that the corresponding rcu_head
1030-
* structure can be handled by kvfree_rcu()?
1031-
*/
1032-
#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
1033-
10341028
/**
10351029
* kfree_rcu() - kfree an object after a grace period.
10361030
* @ptr: pointer to kfree for double-argument invocations.
@@ -1041,11 +1035,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
10411035
* when they are used in a kernel module, that module must invoke the
10421036
* high-latency rcu_barrier() function at module-unload time.
10431037
*
1044-
* The kfree_rcu() function handles this issue. Rather than encoding a
1045-
* function address in the embedded rcu_head structure, kfree_rcu() instead
1046-
* encodes the offset of the rcu_head structure within the base structure.
1047-
* Because the functions are not allowed in the low-order 4096 bytes of
1048-
* kernel virtual memory, offsets up to 4095 bytes can be accommodated.
1038+
* The kfree_rcu() function handles this issue. In order to have a universal
1039+
* callback function handling different offsets of rcu_head, the callback needs
1040+
* to determine the starting address of the freed object, which can be a large
1041+
* kmalloc or vmalloc allocation. To allow simply aligning the pointer down to
1042+
* page boundary for those, only offsets up to 4095 bytes can be accommodated.
10491043
* If the offset is larger than 4095 bytes, a compile-time error will
10501044
* be generated in kvfree_rcu_arg_2(). If this error is triggered, you can
10511045
* either fall back to use of call_rcu() or rearrange the structure to
@@ -1082,14 +1076,23 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
10821076
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
10831077
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
10841078

1079+
/*
1080+
* In mm/slab_common.c, no suitable header to include here.
1081+
*/
1082+
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
1083+
1084+
/*
1085+
* The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the
1086+
* comment of kfree_rcu() for details.
1087+
*/
10851088
#define kvfree_rcu_arg_2(ptr, rhf) \
10861089
do { \
10871090
typeof (ptr) ___p = (ptr); \
10881091
\
1089-
if (___p) { \
1090-
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
1091-
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
1092-
} \
1092+
if (___p) { \
1093+
BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \
1094+
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
1095+
} \
10931096
} while (0)
10941097

10951098
#define kvfree_rcu_arg_1(ptr) \

include/linux/rcutiny.h

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -90,41 +90,6 @@ static inline void synchronize_rcu_expedited(void)
9090
synchronize_rcu();
9191
}
9292

93-
/*
94-
* Add one more declaration of kvfree() here. It is
95-
* not so straight forward to just include <linux/mm.h>
96-
* where it is defined due to getting many compile
97-
* errors caused by that include.
98-
*/
99-
extern void kvfree(const void *addr);
100-
101-
static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
102-
{
103-
if (head) {
104-
call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
105-
return;
106-
}
107-
108-
// kvfree_rcu(one_arg) call.
109-
might_sleep();
110-
synchronize_rcu();
111-
kvfree(ptr);
112-
}
113-
114-
static inline void kvfree_rcu_barrier(void)
115-
{
116-
rcu_barrier();
117-
}
118-
119-
#ifdef CONFIG_KASAN_GENERIC
120-
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
121-
#else
122-
static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
123-
{
124-
__kvfree_call_rcu(head, ptr);
125-
}
126-
#endif
127-
12893
void rcu_qs(void);
12994

13095
static inline void rcu_softirq_qs(void)
@@ -164,7 +129,6 @@ static inline void rcu_end_inkernel_boot(void) { }
164129
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
165130
static inline bool rcu_is_watching(void) { return true; }
166131
static inline void rcu_momentary_eqs(void) { }
167-
static inline void kfree_rcu_scheduler_running(void) { }
168132

169133
/* Avoid RCU read-side critical sections leaking across. */
170134
static inline void rcu_all_qs(void) { barrier(); }

include/linux/rcutree.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,9 @@ static inline void rcu_virt_note_context_switch(void)
3434
}
3535

3636
void synchronize_rcu_expedited(void);
37-
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
38-
void kvfree_rcu_barrier(void);
3937

4038
void rcu_barrier(void);
4139
void rcu_momentary_eqs(void);
42-
void kfree_rcu_scheduler_running(void);
4340

4441
struct rcu_gp_oldstate {
4542
unsigned long rgos_norm;

include/linux/slab.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/gfp.h>
1717
#include <linux/overflow.h>
1818
#include <linux/types.h>
19+
#include <linux/rcupdate.h>
1920
#include <linux/workqueue.h>
2021
#include <linux/percpu-refcount.h>
2122
#include <linux/cleanup.h>
@@ -941,8 +942,6 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz
941942

942943
if (unlikely(check_mul_overflow(n, size, &bytes)))
943944
return NULL;
944-
if (__builtin_constant_p(n) && __builtin_constant_p(size))
945-
return kmalloc_noprof(bytes, flags);
946945
return kmalloc_noprof(bytes, flags);
947946
}
948947
#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__))
@@ -1082,6 +1081,19 @@ extern void kvfree_sensitive(const void *addr, size_t len);
10821081

10831082
unsigned int kmem_cache_size(struct kmem_cache *s);
10841083

1084+
#ifndef CONFIG_KVFREE_RCU_BATCHED
1085+
static inline void kvfree_rcu_barrier(void)
1086+
{
1087+
rcu_barrier();
1088+
}
1089+
1090+
static inline void kfree_rcu_scheduler_running(void) { }
1091+
#else
1092+
void kvfree_rcu_barrier(void);
1093+
1094+
void kfree_rcu_scheduler_running(void);
1095+
#endif
1096+
10851097
/**
10861098
* kmalloc_size_roundup - Report allocation bucket size for the given size
10871099
*

include/trace/events/rcu.h

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -560,40 +560,6 @@ TRACE_EVENT_RCU(rcu_segcb_stats,
560560

561561
);
562562

563-
/*
564-
* Tracepoint for the registration of a single RCU callback of the special
565-
* kvfree() form. The first argument is the RCU type, the second argument
566-
* is a pointer to the RCU callback, the third argument is the offset
567-
* of the callback within the enclosing RCU-protected data structure,
568-
* the fourth argument is the number of lazy callbacks queued, and the
569-
* fifth argument is the total number of callbacks queued.
570-
*/
571-
TRACE_EVENT_RCU(rcu_kvfree_callback,
572-
573-
TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset,
574-
long qlen),
575-
576-
TP_ARGS(rcuname, rhp, offset, qlen),
577-
578-
TP_STRUCT__entry(
579-
__field(const char *, rcuname)
580-
__field(void *, rhp)
581-
__field(unsigned long, offset)
582-
__field(long, qlen)
583-
),
584-
585-
TP_fast_assign(
586-
__entry->rcuname = rcuname;
587-
__entry->rhp = rhp;
588-
__entry->offset = offset;
589-
__entry->qlen = qlen;
590-
),
591-
592-
TP_printk("%s rhp=%p func=%ld %ld",
593-
__entry->rcuname, __entry->rhp, __entry->offset,
594-
__entry->qlen)
595-
);
596-
597563
/*
598564
* Tracepoint for marking the beginning rcu_do_batch, performed to start
599565
* RCU callback invocation. The first argument is the RCU flavor,

kernel/rcu/tiny.c

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,8 @@ void rcu_sched_clock_irq(int user)
8585
static inline bool rcu_reclaim_tiny(struct rcu_head *head)
8686
{
8787
rcu_callback_t f;
88-
unsigned long offset = (unsigned long)head->func;
8988

9089
rcu_lock_acquire(&rcu_callback_map);
91-
if (__is_kvfree_rcu_offset(offset)) {
92-
trace_rcu_invoke_kvfree_callback("", head, offset);
93-
kvfree((void *)head - offset);
94-
rcu_lock_release(&rcu_callback_map);
95-
return true;
96-
}
9790

9891
trace_rcu_invoke_callback("", head);
9992
f = head->func;
@@ -159,10 +152,6 @@ void synchronize_rcu(void)
159152
}
160153
EXPORT_SYMBOL_GPL(synchronize_rcu);
161154

162-
static void tiny_rcu_leak_callback(struct rcu_head *rhp)
163-
{
164-
}
165-
166155
/*
167156
* Post an RCU callback to be invoked after the end of an RCU grace
168157
* period. But since we have but one CPU, that would be after any
@@ -178,9 +167,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
178167
pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
179168
mem_dump_obj(head);
180169
}
181-
182-
if (!__is_kvfree_rcu_offset((unsigned long)head->func))
183-
WRITE_ONCE(head->func, tiny_rcu_leak_callback);
184170
return;
185171
}
186172

@@ -246,17 +232,6 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
246232
}
247233
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
248234

249-
#ifdef CONFIG_KASAN_GENERIC
250-
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
251-
{
252-
if (head)
253-
kasan_record_aux_stack(ptr);
254-
255-
__kvfree_call_rcu(head, ptr);
256-
}
257-
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
258-
#endif
259-
260235
void __init rcu_init(void)
261236
{
262237
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

kernel/rcu/tree.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2931,13 +2931,8 @@ static int __init rcu_spawn_core_kthreads(void)
29312931
static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func)
29322932
{
29332933
rcu_segcblist_enqueue(&rdp->cblist, head);
2934-
if (__is_kvfree_rcu_offset((unsigned long)func))
2935-
trace_rcu_kvfree_callback(rcu_state.name, head,
2936-
(unsigned long)func,
2937-
rcu_segcblist_n_cbs(&rdp->cblist));
2938-
else
2939-
trace_rcu_callback(rcu_state.name, head,
2940-
rcu_segcblist_n_cbs(&rdp->cblist));
2934+
trace_rcu_callback(rcu_state.name, head,
2935+
rcu_segcblist_n_cbs(&rdp->cblist));
29412936
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
29422937
}
29432938

lib/tests/slub_kunit.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/module.h>
77
#include <linux/kernel.h>
88
#include <linux/rcupdate.h>
9+
#include <linux/delay.h>
910
#include "../mm/slab.h"
1011

1112
static struct kunit_resource resource;
@@ -181,6 +182,63 @@ static void test_kfree_rcu(struct kunit *test)
181182
KUNIT_EXPECT_EQ(test, 0, slab_errors);
182183
}
183184

185+
struct cache_destroy_work {
186+
struct work_struct work;
187+
struct kmem_cache *s;
188+
};
189+
190+
static void cache_destroy_workfn(struct work_struct *w)
191+
{
192+
struct cache_destroy_work *cdw;
193+
194+
cdw = container_of(w, struct cache_destroy_work, work);
195+
kmem_cache_destroy(cdw->s);
196+
}
197+
198+
#define KMEM_CACHE_DESTROY_NR 10
199+
200+
static void test_kfree_rcu_wq_destroy(struct kunit *test)
201+
{
202+
struct test_kfree_rcu_struct *p;
203+
struct cache_destroy_work cdw;
204+
struct workqueue_struct *wq;
205+
struct kmem_cache *s;
206+
unsigned int delay;
207+
int i;
208+
209+
if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
210+
kunit_skip(test, "can't do kfree_rcu() when test is built-in");
211+
212+
INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
213+
wq = alloc_workqueue("test_kfree_rcu_destroy_wq",
214+
WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
215+
216+
if (!wq)
217+
kunit_skip(test, "failed to alloc wq");
218+
219+
for (i = 0; i < KMEM_CACHE_DESTROY_NR; i++) {
220+
s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
221+
sizeof(struct test_kfree_rcu_struct),
222+
SLAB_NO_MERGE);
223+
224+
if (!s)
225+
kunit_skip(test, "failed to create cache");
226+
227+
delay = get_random_u8();
228+
p = kmem_cache_alloc(s, GFP_KERNEL);
229+
kfree_rcu(p, rcu);
230+
231+
cdw.s = s;
232+
233+
msleep(delay);
234+
queue_work(wq, &cdw.work);
235+
flush_work(&cdw.work);
236+
}
237+
238+
destroy_workqueue(wq);
239+
KUNIT_EXPECT_EQ(test, 0, slab_errors);
240+
}
241+
184242
static void test_leak_destroy(struct kunit *test)
185243
{
186244
struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
@@ -254,6 +312,7 @@ static struct kunit_case test_cases[] = {
254312
KUNIT_CASE(test_clobber_redzone_free),
255313
KUNIT_CASE(test_kmalloc_redzone_access),
256314
KUNIT_CASE(test_kfree_rcu),
315+
KUNIT_CASE(test_kfree_rcu_wq_destroy),
257316
KUNIT_CASE(test_leak_destroy),
258317
KUNIT_CASE(test_krealloc_redzone_zeroing),
259318
{}

mm/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,10 @@ menu "Slab allocator options"
242242
config SLUB
243243
def_bool y
244244

245+
config KVFREE_RCU_BATCHED
246+
def_bool y
247+
depends on !SLUB_TINY && !TINY_RCU
248+
245249
config SLUB_TINY
246250
bool "Configure for minimal memory footprint"
247251
depends on EXPERT

0 commit comments

Comments
 (0)