Skip to content

Commit 9140f57

Browse files
author
Peter Zijlstra
committed
futex,selftests: Add another FUTEX2_NUMA selftest
Implement a simple NUMA aware spinlock for testing and howto purposes. Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
1 parent 3163369 commit 9140f57

File tree

2 files changed

+264
-1
lines changed

2 files changed

+264
-1
lines changed

tools/testing/selftests/futex/functional/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ TEST_GEN_PROGS := \
1919
futex_requeue \
2020
futex_priv_hash \
2121
futex_numa_mpol \
22-
futex_waitv
22+
futex_waitv \
23+
futex_numa
2324

2425
TEST_PROGS := run.sh
2526

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <pthread.h>
4+
#include <sys/shm.h>
5+
#include <sys/mman.h>
6+
#include <fcntl.h>
7+
#include <stdbool.h>
8+
#include <time.h>
9+
#include <assert.h>
10+
#include "logging.h"
11+
#include "futextest.h"
12+
#include "futex2test.h"
13+
14+
typedef u_int32_t u32;
15+
typedef int32_t s32;
16+
typedef u_int64_t u64;
17+
18+
static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
19+
static int fnode = FUTEX_NO_NODE;
20+
21+
/* fairly stupid test-and-set lock with a waiter flag */
22+
23+
#define N_LOCK 0x0000001
24+
#define N_WAITERS 0x0001000
25+
26+
struct futex_numa_32 {
27+
union {
28+
u64 full;
29+
struct {
30+
u32 val;
31+
u32 node;
32+
};
33+
};
34+
};
35+
36+
void futex_numa_32_lock(struct futex_numa_32 *lock)
37+
{
38+
for (;;) {
39+
struct futex_numa_32 new, old = {
40+
.full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
41+
};
42+
43+
for (;;) {
44+
new = old;
45+
if (old.val == 0) {
46+
/* no waiter, no lock -> first lock, set no-node */
47+
new.node = fnode;
48+
}
49+
if (old.val & N_LOCK) {
50+
/* contention, set waiter */
51+
new.val |= N_WAITERS;
52+
}
53+
new.val |= N_LOCK;
54+
55+
/* nothing changed, ready to block */
56+
if (old.full == new.full)
57+
break;
58+
59+
/*
60+
* Use u64 cmpxchg to set the futex value and node in a
61+
* consistent manner.
62+
*/
63+
if (__atomic_compare_exchange_n(&lock->full,
64+
&old.full, new.full,
65+
/* .weak */ false,
66+
__ATOMIC_ACQUIRE,
67+
__ATOMIC_RELAXED)) {
68+
69+
/* if we just set N_LOCK, we own it */
70+
if (!(old.val & N_LOCK))
71+
return;
72+
73+
/* go block */
74+
break;
75+
}
76+
}
77+
78+
futex2_wait(lock, new.val, fflags, NULL, 0);
79+
}
80+
}
81+
82+
void futex_numa_32_unlock(struct futex_numa_32 *lock)
83+
{
84+
u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
85+
assert((s32)val >= 0);
86+
if (val & N_WAITERS) {
87+
int woken = futex2_wake(lock, 1, fflags);
88+
assert(val == N_WAITERS);
89+
if (!woken) {
90+
__atomic_compare_exchange_n(&lock->val, &val, 0U,
91+
false, __ATOMIC_RELAXED,
92+
__ATOMIC_RELAXED);
93+
}
94+
}
95+
}
96+
97+
static long nanos = 50000;
98+
99+
struct thread_args {
100+
pthread_t tid;
101+
volatile int * done;
102+
struct futex_numa_32 *lock;
103+
int val;
104+
int *val1, *val2;
105+
int node;
106+
};
107+
108+
static void *threadfn(void *_arg)
109+
{
110+
struct thread_args *args = _arg;
111+
struct timespec ts = {
112+
.tv_nsec = nanos,
113+
};
114+
int node;
115+
116+
while (!*args->done) {
117+
118+
futex_numa_32_lock(args->lock);
119+
args->val++;
120+
121+
assert(*args->val1 == *args->val2);
122+
(*args->val1)++;
123+
nanosleep(&ts, NULL);
124+
(*args->val2)++;
125+
126+
node = args->lock->node;
127+
futex_numa_32_unlock(args->lock);
128+
129+
if (node != args->node) {
130+
args->node = node;
131+
printf("node: %d\n", node);
132+
}
133+
134+
nanosleep(&ts, NULL);
135+
}
136+
137+
return NULL;
138+
}
139+
140+
static void *contendfn(void *_arg)
141+
{
142+
struct thread_args *args = _arg;
143+
144+
while (!*args->done) {
145+
/*
146+
* futex2_wait() will take hb-lock, verify *var == val and
147+
* queue/abort. By knowingly setting val 'wrong' this will
148+
* abort and thereby generate hb-lock contention.
149+
*/
150+
futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
151+
args->val++;
152+
}
153+
154+
return NULL;
155+
}
156+
157+
static volatile int done = 0;
158+
static struct futex_numa_32 lock = { .val = 0, };
159+
static int val1, val2;
160+
161+
int main(int argc, char *argv[])
162+
{
163+
struct thread_args *tas[512], *cas[512];
164+
int c, t, threads = 2, contenders = 0;
165+
int sleeps = 10;
166+
int total = 0;
167+
168+
while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
169+
switch (c) {
170+
case 'c':
171+
contenders = atoi(optarg);
172+
break;
173+
case 't':
174+
threads = atoi(optarg);
175+
break;
176+
case 's':
177+
sleeps = atoi(optarg);
178+
break;
179+
case 'n':
180+
nanos = atoi(optarg);
181+
break;
182+
case 'N':
183+
fflags |= FUTEX2_NUMA;
184+
if (optarg)
185+
fnode = atoi(optarg);
186+
break;
187+
default:
188+
exit(1);
189+
break;
190+
}
191+
}
192+
193+
for (t = 0; t < contenders; t++) {
194+
struct thread_args *args = calloc(1, sizeof(*args));
195+
if (!args) {
196+
perror("thread_args");
197+
exit(-1);
198+
}
199+
200+
args->done = &done;
201+
args->lock = &lock;
202+
args->val1 = &val1;
203+
args->val2 = &val2;
204+
args->node = -1;
205+
206+
if (pthread_create(&args->tid, NULL, contendfn, args)) {
207+
perror("pthread_create");
208+
exit(-1);
209+
}
210+
211+
cas[t] = args;
212+
}
213+
214+
for (t = 0; t < threads; t++) {
215+
struct thread_args *args = calloc(1, sizeof(*args));
216+
if (!args) {
217+
perror("thread_args");
218+
exit(-1);
219+
}
220+
221+
args->done = &done;
222+
args->lock = &lock;
223+
args->val1 = &val1;
224+
args->val2 = &val2;
225+
args->node = -1;
226+
227+
if (pthread_create(&args->tid, NULL, threadfn, args)) {
228+
perror("pthread_create");
229+
exit(-1);
230+
}
231+
232+
tas[t] = args;
233+
}
234+
235+
sleep(sleeps);
236+
237+
done = true;
238+
239+
for (t = 0; t < threads; t++) {
240+
struct thread_args *args = tas[t];
241+
242+
pthread_join(args->tid, NULL);
243+
total += args->val;
244+
// printf("tval: %d\n", args->val);
245+
}
246+
printf("total: %d\n", total);
247+
248+
if (contenders) {
249+
total = 0;
250+
for (t = 0; t < contenders; t++) {
251+
struct thread_args *args = cas[t];
252+
253+
pthread_join(args->tid, NULL);
254+
total += args->val;
255+
// printf("tval: %d\n", args->val);
256+
}
257+
printf("contenders: %d\n", total);
258+
}
259+
260+
return 0;
261+
}
262+

0 commit comments

Comments
 (0)