Skip to content

Commit 75dd8ef

Browse files
committed
refperf: Hoist function-pointer calls out of the loop
Current runs show PREEMPT=n rcu_read_lock()/rcu_read_unlock() pairs consuming between 20 and 30 nanoseconds, when in fact the actual value is zero, give or take the barrier() asm's effect on compiler optimizations. The additional overhead is caused by function calls through pointers (especially in these days of Spectre mitigations) and perhaps also needless argument passing, a non-const loop limit, and an upcounting loop. This commit therefore combines the ->readlock() and ->readunlock() function pointers into a single ->readsection() function pointer that takes the loop count as a const parameter and keeps any data passed from the read-lock to the read-unlock internal to this new function. These changes reduce the measured overhead of the aforementioned PREEMPT=n rcu_read_lock()/rcu_read_unlock() pairs from between 20 and 30 nanoseconds to somewhere south of 500 picoseconds. Cc: Joel Fernandes (Google) <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 777a54c commit 75dd8ef

File tree

1 file changed

+38
-54
lines changed

1 file changed

+38
-54
lines changed

kernel/rcu/refperf.c

Lines changed: 38 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -108,23 +108,20 @@ static int exp_idx;
108108
struct ref_perf_ops {
109109
void (*init)(void);
110110
void (*cleanup)(void);
111-
int (*readlock)(void);
112-
void (*readunlock)(int idx);
111+
void (*readsection)(const int nloops);
113112
const char *name;
114113
};
115114

116115
static struct ref_perf_ops *cur_ops;
117116

118-
// Definitions for RCU ref perf testing.
119-
static int ref_rcu_read_lock(void) __acquires(RCU)
117+
static void ref_rcu_read_section(const int nloops)
120118
{
121-
rcu_read_lock();
122-
return 0;
123-
}
119+
int i;
124120

125-
static void ref_rcu_read_unlock(int idx) __releases(RCU)
126-
{
127-
rcu_read_unlock();
121+
for (i = nloops; i >= 0; i--) {
122+
rcu_read_lock();
123+
rcu_read_unlock();
124+
}
128125
}
129126

130127
static void rcu_sync_perf_init(void)
@@ -133,8 +130,7 @@ static void rcu_sync_perf_init(void)
133130

134131
static struct ref_perf_ops rcu_ops = {
135132
.init = rcu_sync_perf_init,
136-
.readlock = ref_rcu_read_lock,
137-
.readunlock = ref_rcu_read_unlock,
133+
.readsection = ref_rcu_read_section,
138134
.name = "rcu"
139135
};
140136

@@ -143,42 +139,39 @@ static struct ref_perf_ops rcu_ops = {
143139
DEFINE_STATIC_SRCU(srcu_refctl_perf);
144140
static struct srcu_struct *srcu_ctlp = &srcu_refctl_perf;
145141

146-
static int srcu_ref_perf_read_lock(void) __acquires(srcu_ctlp)
142+
static void srcu_ref_perf_read_section(int nloops)
147143
{
148-
return srcu_read_lock(srcu_ctlp);
149-
}
144+
int i;
145+
int idx;
150146

151-
static void srcu_ref_perf_read_unlock(int idx) __releases(srcu_ctlp)
152-
{
153-
srcu_read_unlock(srcu_ctlp, idx);
147+
for (i = nloops; i >= 0; i--) {
148+
idx = srcu_read_lock(srcu_ctlp);
149+
srcu_read_unlock(srcu_ctlp, idx);
150+
}
154151
}
155152

156153
static struct ref_perf_ops srcu_ops = {
157154
.init = rcu_sync_perf_init,
158-
.readlock = srcu_ref_perf_read_lock,
159-
.readunlock = srcu_ref_perf_read_unlock,
155+
.readsection = srcu_ref_perf_read_section,
160156
.name = "srcu"
161157
};
162158

163159
// Definitions for reference count
164160
static atomic_t refcnt;
165161

166-
static int srcu_ref_perf_refcnt_lock(void)
162+
static void ref_perf_refcnt_section(const int nloops)
167163
{
168-
atomic_inc(&refcnt);
169-
return 0;
170-
}
164+
int i;
171165

172-
static void srcu_ref_perf_refcnt_unlock(int idx) __releases(srcu_ctlp)
173-
{
174-
atomic_dec(&refcnt);
175-
srcu_read_unlock(srcu_ctlp, idx);
166+
for (i = nloops; i >= 0; i--) {
167+
atomic_inc(&refcnt);
168+
atomic_dec(&refcnt);
169+
}
176170
}
177171

178172
static struct ref_perf_ops refcnt_ops = {
179173
.init = rcu_sync_perf_init,
180-
.readlock = srcu_ref_perf_refcnt_lock,
181-
.readunlock = srcu_ref_perf_refcnt_unlock,
174+
.readsection = ref_perf_refcnt_section,
182175
.name = "refcnt"
183176
};
184177

@@ -190,21 +183,19 @@ static void ref_perf_rwlock_init(void)
190183
rwlock_init(&test_rwlock);
191184
}
192185

193-
static int ref_perf_rwlock_lock(void)
186+
static void ref_perf_rwlock_section(const int nloops)
194187
{
195-
read_lock(&test_rwlock);
196-
return 0;
197-
}
188+
int i;
198189

199-
static void ref_perf_rwlock_unlock(int idx)
200-
{
201-
read_unlock(&test_rwlock);
190+
for (i = nloops; i >= 0; i--) {
191+
read_lock(&test_rwlock);
192+
read_unlock(&test_rwlock);
193+
}
202194
}
203195

204196
static struct ref_perf_ops rwlock_ops = {
205197
.init = ref_perf_rwlock_init,
206-
.readlock = ref_perf_rwlock_lock,
207-
.readunlock = ref_perf_rwlock_unlock,
198+
.readsection = ref_perf_rwlock_section,
208199
.name = "rwlock"
209200
};
210201

@@ -216,21 +207,19 @@ static void ref_perf_rwsem_init(void)
216207
init_rwsem(&test_rwsem);
217208
}
218209

219-
static int ref_perf_rwsem_lock(void)
210+
static void ref_perf_rwsem_section(const int nloops)
220211
{
221-
down_read(&test_rwsem);
222-
return 0;
223-
}
212+
int i;
224213

225-
static void ref_perf_rwsem_unlock(int idx)
226-
{
227-
up_read(&test_rwsem);
214+
for (i = nloops; i >= 0; i--) {
215+
down_read(&test_rwsem);
216+
up_read(&test_rwsem);
217+
}
228218
}
229219

230220
static struct ref_perf_ops rwsem_ops = {
231221
.init = ref_perf_rwsem_init,
232-
.readlock = ref_perf_rwsem_lock,
233-
.readunlock = ref_perf_rwsem_unlock,
222+
.readsection = ref_perf_rwsem_section,
234223
.name = "rwsem"
235224
};
236225

@@ -242,8 +231,6 @@ ref_perf_reader(void *arg)
242231
unsigned long flags;
243232
long me = (long)arg;
244233
struct reader_task *rt = &(reader_tasks[me]);
245-
unsigned long spincnt;
246-
int idx;
247234
u64 start;
248235
s64 duration;
249236

@@ -275,10 +262,7 @@ ref_perf_reader(void *arg)
275262

276263
VERBOSE_PERFOUT("ref_perf_reader %ld: experiment %d started", me, exp_idx);
277264

278-
for (spincnt = 0; spincnt < loops; spincnt++) {
279-
idx = cur_ops->readlock();
280-
cur_ops->readunlock(idx);
281-
}
265+
cur_ops->readsection(loops);
282266

283267
duration = ktime_get_mono_fast_ns() - start;
284268
local_irq_restore(flags);

0 commit comments

Comments
 (0)