Skip to content

Commit 49d5377

Browse files
committed
rcu, slab: use a regular callback function for kvfree_rcu
RCU has been special-casing callback function pointers that are integers lower than 4096 as offsets of rcu_head for kvfree() instead. The tree RCU implementation no longer does that as the batched kvfree_rcu() is not a simple call_rcu(). The tiny RCU still does, and the plan is also to make tree RCU use call_rcu() for SLUB_TINY configurations. Instead of teaching tree RCU again to special case the offsets, let's remove the special casing completely. Since there's no SLOB anymore, it is possible to create a callback function that can take a pointer to a middle of slab object with unknown offset and determine the object's pointer before freeing it, so implement that as kvfree_rcu_cb(). Large kmalloc and vmalloc allocations are handled simply by aligning down to page size. For that we retain the requirement that the offset is smaller than 4096. But we can remove __is_kvfree_rcu_offset() completely and instead just opencode the condition in the BUILD_BUG_ON() check. Reviewed-by: Joel Fernandes (Google) <[email protected]> Reviewed-by: Hyeonggon Yoo <[email protected]> Tested-by: Paul E. McKenney <[email protected]> Signed-off-by: Vlastimil Babka <[email protected]>
1 parent 7f4b19e commit 49d5377

File tree

5 files changed

+63
-32
lines changed

5 files changed

+63
-32
lines changed

include/linux/rcupdate.h

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,12 +1025,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
10251025
#define RCU_POINTER_INITIALIZER(p, v) \
10261026
.p = RCU_INITIALIZER(v)
10271027

1028-
/*
1029-
* Does the specified offset indicate that the corresponding rcu_head
1030-
* structure can be handled by kvfree_rcu()?
1031-
*/
1032-
#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
1033-
10341028
/**
10351029
* kfree_rcu() - kfree an object after a grace period.
10361030
* @ptr: pointer to kfree for double-argument invocations.
@@ -1041,11 +1035,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
10411035
* when they are used in a kernel module, that module must invoke the
10421036
* high-latency rcu_barrier() function at module-unload time.
10431037
*
1044-
* The kfree_rcu() function handles this issue. Rather than encoding a
1045-
* function address in the embedded rcu_head structure, kfree_rcu() instead
1046-
* encodes the offset of the rcu_head structure within the base structure.
1047-
* Because the functions are not allowed in the low-order 4096 bytes of
1048-
* kernel virtual memory, offsets up to 4095 bytes can be accommodated.
1038+
* The kfree_rcu() function handles this issue. In order to have a universal
1039+
* callback function handling different offsets of rcu_head, the callback needs
1040+
* to determine the starting address of the freed object, which can be a large
1041+
* kmalloc or vmalloc allocation. To allow simply aligning the pointer down to
1042+
* page boundary for those, only offsets up to 4095 bytes can be accommodated.
10491043
* If the offset is larger than 4095 bytes, a compile-time error will
10501044
* be generated in kvfree_rcu_arg_2(). If this error is triggered, you can
10511045
* either fall back to use of call_rcu() or rearrange the structure to
@@ -1087,14 +1081,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
10871081
*/
10881082
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
10891083

1084+
/*
1085+
* The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the
1086+
* comment of kfree_rcu() for details.
1087+
*/
10901088
#define kvfree_rcu_arg_2(ptr, rhf) \
10911089
do { \
10921090
typeof (ptr) ___p = (ptr); \
10931091
\
1094-
if (___p) { \
1095-
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
1096-
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
1097-
} \
1092+
if (___p) { \
1093+
BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \
1094+
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
1095+
} \
10981096
} while (0)
10991097

11001098
#define kvfree_rcu_arg_1(ptr) \

kernel/rcu/tiny.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,8 @@ void rcu_sched_clock_irq(int user)
8585
static inline bool rcu_reclaim_tiny(struct rcu_head *head)
8686
{
8787
rcu_callback_t f;
88-
unsigned long offset = (unsigned long)head->func;
8988

9089
rcu_lock_acquire(&rcu_callback_map);
91-
if (__is_kvfree_rcu_offset(offset)) {
92-
trace_rcu_invoke_kvfree_callback("", head, offset);
93-
kvfree((void *)head - offset);
94-
rcu_lock_release(&rcu_callback_map);
95-
return true;
96-
}
9790

9891
trace_rcu_invoke_callback("", head);
9992
f = head->func;
@@ -159,10 +152,6 @@ void synchronize_rcu(void)
159152
}
160153
EXPORT_SYMBOL_GPL(synchronize_rcu);
161154

162-
static void tiny_rcu_leak_callback(struct rcu_head *rhp)
163-
{
164-
}
165-
166155
/*
167156
* Post an RCU callback to be invoked after the end of an RCU grace
168157
* period. But since we have but one CPU, that would be after any
@@ -178,9 +167,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
178167
pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
179168
mem_dump_obj(head);
180169
}
181-
182-
if (!__is_kvfree_rcu_offset((unsigned long)head->func))
183-
WRITE_ONCE(head->func, tiny_rcu_leak_callback);
184170
return;
185171
}
186172

mm/slab.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,8 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
604604
void **p, int objects, struct slabobj_ext *obj_exts);
605605
#endif
606606

607+
void kvfree_rcu_cb(struct rcu_head *head);
608+
607609
size_t __ksize(const void *objp);
608610

609611
static inline size_t slab_ksize(const struct kmem_cache *s)

mm/slab_common.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
12901290
{
12911291
if (head) {
12921292
kasan_record_aux_stack(ptr);
1293-
call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
1293+
call_rcu(head, kvfree_rcu_cb);
12941294
return;
12951295
}
12961296

@@ -1551,8 +1551,7 @@ kvfree_rcu_list(struct rcu_head *head)
15511551
rcu_lock_acquire(&rcu_callback_map);
15521552
trace_rcu_invoke_kvfree_callback("slab", head, offset);
15531553

1554-
if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
1555-
kvfree(ptr);
1554+
kvfree(ptr);
15561555

15571556
rcu_lock_release(&rcu_callback_map);
15581557
cond_resched_tasks_rcu_qs();

mm/slub.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/bitops.h>
2020
#include <linux/slab.h>
2121
#include "slab.h"
22+
#include <linux/vmalloc.h>
2223
#include <linux/proc_fs.h>
2324
#include <linux/seq_file.h>
2425
#include <linux/kasan.h>
@@ -4728,6 +4729,51 @@ static void free_large_kmalloc(struct folio *folio, void *object)
47284729
folio_put(folio);
47294730
}
47304731

4732+
/*
4733+
* Given an rcu_head embedded within an object obtained from kvmalloc at an
4734+
* offset < 4k, free the object in question.
4735+
*/
4736+
void kvfree_rcu_cb(struct rcu_head *head)
4737+
{
4738+
void *obj = head;
4739+
struct folio *folio;
4740+
struct slab *slab;
4741+
struct kmem_cache *s;
4742+
void *slab_addr;
4743+
4744+
if (is_vmalloc_addr(obj)) {
4745+
obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
4746+
vfree(obj);
4747+
return;
4748+
}
4749+
4750+
folio = virt_to_folio(obj);
4751+
if (!folio_test_slab(folio)) {
4752+
/*
4753+
* rcu_head offset can be only less than page size so no need to
4754+
* consider folio order
4755+
*/
4756+
obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
4757+
free_large_kmalloc(folio, obj);
4758+
return;
4759+
}
4760+
4761+
slab = folio_slab(folio);
4762+
s = slab->slab_cache;
4763+
slab_addr = folio_address(folio);
4764+
4765+
if (is_kfence_address(obj)) {
4766+
obj = kfence_object_start(obj);
4767+
} else {
4768+
unsigned int idx = __obj_to_index(s, slab_addr, obj);
4769+
4770+
obj = slab_addr + s->size * idx;
4771+
obj = fixup_red_left(s, obj);
4772+
}
4773+
4774+
slab_free(s, slab, obj, _RET_IP_);
4775+
}
4776+
47314777
/**
47324778
* kfree - free previously allocated memory
47334779
* @object: pointer returned by kmalloc() or kmem_cache_alloc()

0 commit comments

Comments
 (0)