Skip to content

Commit c042c50

Browse files
author
Peter Zijlstra
committed
futex: Implement FUTEX2_MPOL
Extend the futex2 interface to be aware of mempolicy. When FUTEX2_MPOL is specified and there is a MPOL_PREFERRED or home_node specified covering the futex address, use that hash-map. Notably, in this case the futex will go to the global node hashtable, even if it is a PRIVATE futex. When FUTEX2_NUMA|FUTEX2_MPOL is specified and the user specified node value is FUTEX_NO_NODE, the MPOL lookup (as described above) will be tried first before reverting to setting node to the local node. [bigeasy: add CONFIG_FUTEX_MPOL, add MPOL to FUTEX2_VALID_MASK, write the node only to user if FUTEX_NO_NODE was supplied] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent cec199c commit c042c50

File tree

5 files changed

+115
-18
lines changed

5 files changed

+115
-18
lines changed

include/linux/mmap_lock.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/rwsem.h>
88
#include <linux/tracepoint-defs.h>
99
#include <linux/types.h>
10+
#include <linux/cleanup.h>
1011

1112
#define MMAP_LOCK_INITIALIZER(name) \
1213
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
211212
up_read(&mm->mmap_lock);
212213
}
213214

215+
DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
216+
mmap_read_lock(_T), mmap_read_unlock(_T))
217+
214218
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
215219
{
216220
__mmap_lock_trace_released(mm, false);

include/uapi/linux/futex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
#define FUTEX2_SIZE_U32 0x02
6464
#define FUTEX2_SIZE_U64 0x03
6565
#define FUTEX2_NUMA 0x04
66-
/* 0x08 */
66+
#define FUTEX2_MPOL 0x08
6767
/* 0x10 */
6868
/* 0x20 */
6969
/* 0x40 */

init/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1704,6 +1704,11 @@ config FUTEX_PRIVATE_HASH
17041704
depends on FUTEX && !BASE_SMALL && MMU
17051705
default y
17061706

1707+
config FUTEX_MPOL
1708+
bool
1709+
depends on FUTEX && NUMA
1710+
default y
1711+
17071712
config EPOLL
17081713
bool "Enable eventpoll support" if EXPERT
17091714
default y

kernel/futex/core.c

Lines changed: 100 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <linux/slab.h>
4444
#include <linux/prctl.h>
4545
#include <linux/rcuref.h>
46+
#include <linux/mempolicy.h>
47+
#include <linux/mmap_lock.h>
4648

4749
#include "futex.h"
4850
#include "../locking/rtmutex_common.h"
@@ -328,6 +330,75 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
328330

329331
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
330332

333+
#ifdef CONFIG_FUTEX_MPOL
334+
335+
static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
336+
{
337+
struct vm_area_struct *vma = vma_lookup(mm, addr);
338+
struct mempolicy *mpol;
339+
int node = FUTEX_NO_NODE;
340+
341+
if (!vma)
342+
return FUTEX_NO_NODE;
343+
344+
mpol = vma_policy(vma);
345+
if (!mpol)
346+
return FUTEX_NO_NODE;
347+
348+
switch (mpol->mode) {
349+
case MPOL_PREFERRED:
350+
node = first_node(mpol->nodes);
351+
break;
352+
case MPOL_PREFERRED_MANY:
353+
case MPOL_BIND:
354+
if (mpol->home_node != NUMA_NO_NODE)
355+
node = mpol->home_node;
356+
break;
357+
default:
358+
break;
359+
}
360+
361+
return node;
362+
}
363+
364+
static int futex_key_to_node_opt(struct mm_struct *mm, unsigned long addr)
365+
{
366+
int seq, node;
367+
368+
guard(rcu)();
369+
370+
if (!mmap_lock_speculate_try_begin(mm, &seq))
371+
return -EBUSY;
372+
373+
node = __futex_key_to_node(mm, addr);
374+
375+
if (mmap_lock_speculate_retry(mm, seq))
376+
return -EAGAIN;
377+
378+
return node;
379+
}
380+
381+
static int futex_mpol(struct mm_struct *mm, unsigned long addr)
382+
{
383+
int node;
384+
385+
node = futex_key_to_node_opt(mm, addr);
386+
if (node >= FUTEX_NO_NODE)
387+
return node;
388+
389+
guard(mmap_read_lock)(mm);
390+
return __futex_key_to_node(mm, addr);
391+
}
392+
393+
#else /* !CONFIG_FUTEX_MPOL */
394+
395+
static int futex_mpol(struct mm_struct *mm, unsigned long addr)
396+
{
397+
return FUTEX_NO_NODE;
398+
}
399+
400+
#endif /* CONFIG_FUTEX_MPOL */
401+
331402
/**
332403
* __futex_hash - Return the hash bucket
333404
* @key: Pointer to the futex key for which the hash is calculated
@@ -342,18 +413,20 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
342413
static struct futex_hash_bucket *
343414
__futex_hash(union futex_key *key, struct futex_private_hash *fph)
344415
{
345-
struct futex_hash_bucket *hb;
416+
int node = key->both.node;
346417
u32 hash;
347-
int node;
348418

349-
hb = __futex_hash_private(key, fph);
350-
if (hb)
351-
return hb;
419+
if (node == FUTEX_NO_NODE) {
420+
struct futex_hash_bucket *hb;
421+
422+
hb = __futex_hash_private(key, fph);
423+
if (hb)
424+
return hb;
425+
}
352426

353427
hash = jhash2((u32 *)key,
354428
offsetof(typeof(*key), both.offset) / sizeof(u32),
355429
key->both.offset);
356-
node = key->both.node;
357430

358431
if (node == FUTEX_NO_NODE) {
359432
/*
@@ -480,6 +553,7 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
480553
struct folio *folio;
481554
struct address_space *mapping;
482555
int node, err, size, ro = 0;
556+
bool node_updated = false;
483557
bool fshared;
484558

485559
fshared = flags & FLAGS_SHARED;
@@ -501,27 +575,37 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
501575
if (unlikely(should_fail_futex(fshared)))
502576
return -EFAULT;
503577

578+
node = FUTEX_NO_NODE;
579+
504580
if (flags & FLAGS_NUMA) {
505581
u32 __user *naddr = (void *)uaddr + size / 2;
506582

507583
if (futex_get_value(&node, naddr))
508584
return -EFAULT;
509585

510-
if (node == FUTEX_NO_NODE) {
511-
node = numa_node_id();
512-
if (futex_put_value(node, naddr))
513-
return -EFAULT;
514-
515-
} else if (node >= MAX_NUMNODES || !node_possible(node)) {
586+
if (node != FUTEX_NO_NODE &&
587+
(node >= MAX_NUMNODES || !node_possible(node)))
516588
return -EINVAL;
517-
}
589+
}
518590

519-
key->both.node = node;
591+
if (node == FUTEX_NO_NODE && (flags & FLAGS_MPOL)) {
592+
node = futex_mpol(mm, address);
593+
node_updated = true;
594+
}
520595

521-
} else {
522-
key->both.node = FUTEX_NO_NODE;
596+
if (flags & FLAGS_NUMA) {
597+
u32 __user *naddr = (void *)uaddr + size / 2;
598+
599+
if (node == FUTEX_NO_NODE) {
600+
node = numa_node_id();
601+
node_updated = true;
602+
}
603+
if (node_updated && futex_put_value(node, naddr))
604+
return -EFAULT;
523605
}
524606

607+
key->both.node = node;
608+
525609
/*
526610
* PROCESS_PRIVATE futexes are fast.
527611
* As the mm cannot disappear under us and the 'key' only needs

kernel/futex/futex.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#define FLAGS_HAS_TIMEOUT 0x0040
4040
#define FLAGS_NUMA 0x0080
4141
#define FLAGS_STRICT 0x0100
42+
#define FLAGS_MPOL 0x0200
4243

4344
/* FUTEX_ to FLAGS_ */
4445
static inline unsigned int futex_to_flags(unsigned int op)
@@ -54,7 +55,7 @@ static inline unsigned int futex_to_flags(unsigned int op)
5455
return flags;
5556
}
5657

57-
#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_PRIVATE)
58+
#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE)
5859

5960
/* FUTEX2_ to FLAGS_ */
6061
static inline unsigned int futex2_to_flags(unsigned int flags2)
@@ -67,6 +68,9 @@ static inline unsigned int futex2_to_flags(unsigned int flags2)
6768
if (flags2 & FUTEX2_NUMA)
6869
flags |= FLAGS_NUMA;
6970

71+
if (flags2 & FUTEX2_MPOL)
72+
flags |= FLAGS_MPOL;
73+
7074
return flags;
7175
}
7276

0 commit comments

Comments
 (0)