Skip to content

Commit b3570b0

Browse files
committed
Merge tag 'locking-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "Futexes: - Add support for task local hash maps (Sebastian Andrzej Siewior, Peter Zijlstra) - Implement the FUTEX2_NUMA ABI, which feature extends the futex interface to be NUMA-aware. On NUMA-aware futexes a second u32 word containing the NUMA node is added to after the u32 futex value word (Peter Zijlstra) - Implement the FUTEX2_MPOL ABI, which feature extends the futex interface to be mempolicy-aware as well, to further refine futex node mappings and lookups (Peter Zijlstra) Locking primitives: - Misc cleanups (Andy Shevchenko, Borislav Petkov, Colin Ian King, Ingo Molnar, Nam Cao, Peter Zijlstra) Lockdep: - Prevent abuse of lockdep subclasses (Waiman Long) - Add number of dynamic keys to /proc/lockdep_stats (Waiman Long) Plus misc cleanups and fixes" * tag 'locking-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits) selftests/futex: Fix spelling mistake "unitiliazed" -> "uninitialized" futex: Correct the kernedoc return value for futex_wait_setup(). tools headers: Synchronize prctl.h ABI header futex: Use RCU_INIT_POINTER() in futex_mm_init(). selftests/futex: Use TAP output in futex_numa_mpol selftests/futex: Use TAP output in futex_priv_hash futex: Fix kernel-doc comments futex: Relax the rcu_assign_pointer() assignment of mm->futex_phash in futex_mm_init() futex: Fix outdated comment in struct restart_block locking/lockdep: Add number of dynamic keys to /proc/lockdep_stats locking/lockdep: Prevent abuse of lockdep subclass locking/lockdep: Move hlock_equal() to the respective #ifdeffery futex,selftests: Add another FUTEX2_NUMA selftest selftests/futex: Add futex_numa_mpol selftests/futex: Add futex_priv_hash selftests/futex: Build without headers nonsense tools/perf: Allow to select the number of hash buckets tools headers: Synchronize prctl.h ABI header futex: Implement FUTEX2_MPOL futex: Implement FUTEX2_NUMA ...
2 parents 3349ada + 94ec708 commit b3570b0

38 files changed

+2523
-599
lines changed

include/linux/futex.h

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
#include <linux/sched.h>
66
#include <linux/ktime.h>
7+
#include <linux/mm_types.h>
78

89
#include <uapi/linux/futex.h>
910

1011
struct inode;
11-
struct mm_struct;
1212
struct task_struct;
1313

1414
/*
@@ -34,6 +34,7 @@ union futex_key {
3434
u64 i_seq;
3535
unsigned long pgoff;
3636
unsigned int offset;
37+
/* unsigned int node; */
3738
} shared;
3839
struct {
3940
union {
@@ -42,11 +43,13 @@ union futex_key {
4243
};
4344
unsigned long address;
4445
unsigned int offset;
46+
/* unsigned int node; */
4547
} private;
4648
struct {
4749
u64 ptr;
4850
unsigned long word;
4951
unsigned int offset;
52+
unsigned int node; /* NOT hashed! */
5053
} both;
5154
};
5255

@@ -77,7 +80,25 @@ void futex_exec_release(struct task_struct *tsk);
7780

7881
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
7982
u32 __user *uaddr2, u32 val2, u32 val3);
80-
#else
83+
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
84+
85+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
86+
int futex_hash_allocate_default(void);
87+
void futex_hash_free(struct mm_struct *mm);
88+
89+
static inline void futex_mm_init(struct mm_struct *mm)
90+
{
91+
RCU_INIT_POINTER(mm->futex_phash, NULL);
92+
mutex_init(&mm->futex_hash_lock);
93+
}
94+
95+
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
96+
static inline int futex_hash_allocate_default(void) { return 0; }
97+
static inline void futex_hash_free(struct mm_struct *mm) { }
98+
static inline void futex_mm_init(struct mm_struct *mm) { }
99+
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
100+
101+
#else /* !CONFIG_FUTEX */
81102
static inline void futex_init_task(struct task_struct *tsk) { }
82103
static inline void futex_exit_recursive(struct task_struct *tsk) { }
83104
static inline void futex_exit_release(struct task_struct *tsk) { }
@@ -88,6 +109,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
88109
{
89110
return -EINVAL;
90111
}
112+
static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
113+
{
114+
return -EINVAL;
115+
}
116+
static inline int futex_hash_allocate_default(void)
117+
{
118+
return 0;
119+
}
120+
static inline void futex_hash_free(struct mm_struct *mm) { }
121+
static inline void futex_mm_init(struct mm_struct *mm) { }
122+
91123
#endif
92124

93125
#endif

include/linux/mm_types.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#define INIT_PASID 0
3232

3333
struct address_space;
34+
struct futex_private_hash;
3435
struct mem_cgroup;
3536

3637
/*
@@ -1031,7 +1032,11 @@ struct mm_struct {
10311032
*/
10321033
seqcount_t mm_lock_seq;
10331034
#endif
1034-
1035+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
1036+
struct mutex futex_hash_lock;
1037+
struct futex_private_hash __rcu *futex_phash;
1038+
struct futex_private_hash *futex_phash_new;
1039+
#endif
10351040

10361041
unsigned long hiwater_rss; /* High-watermark of RSS usage */
10371042
unsigned long hiwater_vm; /* High-water virtual memory usage */

include/linux/mmap_lock.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/rwsem.h>
88
#include <linux/tracepoint-defs.h>
99
#include <linux/types.h>
10+
#include <linux/cleanup.h>
1011

1112
#define MMAP_LOCK_INITIALIZER(name) \
1213
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
211212
up_read(&mm->mmap_lock);
212213
}
213214

215+
DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
216+
mmap_read_lock(_T), mmap_read_unlock(_T))
217+
214218
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
215219
{
216220
__mmap_lock_trace_released(mm, false);

include/linux/rcuref.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
3030
* rcuref_read - Read the number of held reference counts of a rcuref
3131
* @ref: Pointer to the reference count
3232
*
33-
* Return: The number of held references (0 ... N)
33+
* Return: The number of held references (0 ... N). The value 0 does not
34+
* indicate that it is safe to schedule the object, protected by this reference
35+
* counter, for deconstruction.
36+
* If you want to know if the reference counter has been marked DEAD (as
37+
* signaled by rcuref_put()) please use rcuread_is_dead().
3438
*/
3539
static inline unsigned int rcuref_read(rcuref_t *ref)
3640
{
@@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref)
4044
return c >= RCUREF_RELEASED ? 0 : c + 1;
4145
}
4246

47+
/**
48+
* rcuref_is_dead - Check if the rcuref has been already marked dead
49+
* @ref: Pointer to the reference count
50+
*
51+
* Return: True if the object has been marked DEAD. This signals that a previous
52+
* invocation of rcuref_put() returned true on this reference counter meaning
53+
* the protected object can safely be scheduled for deconstruction.
54+
* Otherwise, returns false.
55+
*/
56+
static inline bool rcuref_is_dead(rcuref_t *ref)
57+
{
58+
unsigned int c = atomic_read(&ref->refcnt);
59+
60+
return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF);
61+
}
62+
4363
extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
4464

4565
/**

include/linux/restart_block.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ struct restart_block {
2626
unsigned long arch_data;
2727
long (*fn)(struct restart_block *);
2828
union {
29-
/* For futex_wait and futex_wait_requeue_pi */
29+
/* For futex_wait() */
3030
struct {
3131
u32 __user *uaddr;
3232
u32 val;

include/linux/vmalloc.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m
169169
int node, const void *caller) __alloc_size(1);
170170
#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
171171

172-
void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
173-
#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__))
172+
void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1);
173+
#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__))
174+
175+
static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
176+
{
177+
return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE);
178+
}
174179

175180
extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
176181
#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))

include/uapi/linux/futex.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
#define FUTEX2_SIZE_U32 0x02
6464
#define FUTEX2_SIZE_U64 0x03
6565
#define FUTEX2_NUMA 0x04
66-
/* 0x08 */
66+
#define FUTEX2_MPOL 0x08
6767
/* 0x10 */
6868
/* 0x20 */
6969
/* 0x40 */
@@ -74,6 +74,13 @@
7474
/* do not use */
7575
#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */
7676

77+
/*
78+
* When FUTEX2_NUMA doubles the futex word, the second word is a node value.
79+
* The special value -1 indicates no-node. This is the same value as
80+
* NUMA_NO_NODE, except that value is not ABI, this is.
81+
*/
82+
#define FUTEX_NO_NODE (-1)
83+
7784
/*
7885
* Max numbers of elements in a futex_waitv array
7986
*/

include/uapi/linux/prctl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,4 +364,11 @@ struct prctl_mm_map {
364364
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
365365
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
366366

367+
/* FUTEX hash management */
368+
#define PR_FUTEX_HASH 78
369+
# define PR_FUTEX_HASH_SET_SLOTS 1
370+
# define FH_FLAG_IMMUTABLE (1ULL << 0)
371+
# define PR_FUTEX_HASH_GET_SLOTS 2
372+
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
373+
367374
#endif /* _LINUX_PRCTL_H */

init/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,16 @@ config FUTEX_PI
16871687
depends on FUTEX && RT_MUTEXES
16881688
default y
16891689

1690+
config FUTEX_PRIVATE_HASH
1691+
bool
1692+
depends on FUTEX && !BASE_SMALL && MMU
1693+
default y
1694+
1695+
config FUTEX_MPOL
1696+
bool
1697+
depends on FUTEX && NUMA
1698+
default y
1699+
16901700
config EPOLL
16911701
bool "Enable eventpoll support" if EXPERT
16921702
default y

io_uring/futex.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,6 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
273273
struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
274274
struct io_ring_ctx *ctx = req->ctx;
275275
struct io_futex_data *ifd = NULL;
276-
struct futex_hash_bucket *hb;
277276
int ret;
278277

279278
if (!iof->futex_mask) {
@@ -295,12 +294,11 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
295294
ifd->req = req;
296295

297296
ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags,
298-
&ifd->q, &hb);
297+
&ifd->q, NULL, NULL);
299298
if (!ret) {
300299
hlist_add_head(&req->hash_node, &ctx->futex_list);
301300
io_ring_submit_unlock(ctx, issue_flags);
302301

303-
futex_queue(&ifd->q, hb, NULL);
304302
return IOU_ISSUE_SKIP_COMPLETE;
305303
}
306304

0 commit comments

Comments
 (0)