Skip to content

Commit 580724f

Browse files
FlyGoattsbogend
authored andcommitted
MIPS: sync-r4k: Rework based on x86 tsc_sync
The original sync-r4k did a good job on reducing jitter by determine the "next time value", but it has a limitation that when synchronization being performed too many times due to high core count or CPU hotplug, the timewrap on CPU0 will become unaccpetable. Rework the mechanism based on latest x86 tsc_sync. (It seems like the original implementation is based on tsc_sync at that time, so it's just a refresh.) To improve overall performance. Tesed on Loongson64, Boston, QEMU. Signed-off-by: Jiaxun Yang <[email protected]> Signed-off-by: Thomas Bogendoerfer <[email protected]>
1 parent 7464c07 commit 580724f

File tree

3 files changed

+202
-86
lines changed

3 files changed

+202
-86
lines changed

arch/mips/include/asm/r4k-timer.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,10 @@
1212

1313
#ifdef CONFIG_SYNC_R4K
1414

15-
extern void synchronise_count_master(int cpu);
1615
extern void synchronise_count_slave(int cpu);
1716

1817
#else
1918

20-
static inline void synchronise_count_master(int cpu)
21-
{
22-
}
23-
2419
static inline void synchronise_count_slave(int cpu)
2520
{
2621
}

arch/mips/kernel/smp.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
462462
return -EIO;
463463
}
464464

465-
synchronise_count_master(cpu);
466-
467465
/* Wait for CPU to finish startup & mark itself online before return */
468466
wait_for_completion(&cpu_running);
469467
return 0;

arch/mips/kernel/sync-r4k.c

Lines changed: 202 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2,121 +2,244 @@
22
/*
33
* Count register synchronisation.
44
*
5-
* All CPUs will have their count registers synchronised to the CPU0 next time
6-
* value. This can cause a small timewarp for CPU0. All other CPU's should
7-
* not have done anything significant (but they may have had interrupts
8-
* enabled briefly - prom_smp_finish() should not be responsible for enabling
9-
* interrupts...)
5+
* Derived from arch/x86/kernel/tsc_sync.c
6+
* Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
107
*/
118

129
#include <linux/kernel.h>
1310
#include <linux/irqflags.h>
1411
#include <linux/cpumask.h>
12+
#include <linux/atomic.h>
13+
#include <linux/nmi.h>
14+
#include <linux/smp.h>
15+
#include <linux/spinlock.h>
1516

1617
#include <asm/r4k-timer.h>
17-
#include <linux/atomic.h>
18-
#include <asm/barrier.h>
1918
#include <asm/mipsregs.h>
19+
#include <asm/time.h>
2020

21-
static unsigned int initcount = 0;
22-
static atomic_t count_count_start = ATOMIC_INIT(0);
23-
static atomic_t count_count_stop = ATOMIC_INIT(0);
24-
25-
#define COUNTON 100
26-
#define NR_LOOPS 3
27-
28-
void synchronise_count_master(int cpu)
29-
{
30-
int i;
31-
unsigned long flags;
32-
33-
pr_info("Synchronize counters for CPU %u: ", cpu);
21+
#define COUNTON 100
22+
#define NR_LOOPS 3
23+
#define LOOP_TIMEOUT 20
3424

35-
local_irq_save(flags);
25+
/*
26+
* Entry/exit counters that make sure that both CPUs
27+
* run the measurement code at once:
28+
*/
29+
static atomic_t start_count;
30+
static atomic_t stop_count;
31+
static atomic_t test_runs;
3632

37-
/*
38-
* We loop a few times to get a primed instruction cache,
39-
* then the last pass is more or less synchronised and
40-
* the master and slaves each set their cycle counters to a known
41-
* value all at once. This reduces the chance of having random offsets
42-
* between the processors, and guarantees that the maximum
43-
* delay between the cycle counters is never bigger than
44-
* the latency of information-passing (cachelines) between
45-
* two CPUs.
46-
*/
33+
/*
34+
* We use a raw spinlock in this exceptional case, because
35+
* we want to have the fastest, inlined, non-debug version
36+
* of a critical section, to be able to prove counter time-warps:
37+
*/
38+
static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
4739

48-
for (i = 0; i < NR_LOOPS; i++) {
49-
/* slaves loop on '!= 2' */
50-
while (atomic_read(&count_count_start) != 1)
51-
mb();
52-
atomic_set(&count_count_stop, 0);
53-
smp_wmb();
40+
static uint32_t last_counter;
41+
static uint32_t max_warp;
42+
static int nr_warps;
43+
static int random_warps;
5444

55-
/* Let the slave writes its count register */
56-
atomic_inc(&count_count_start);
45+
/*
46+
* Counter warp measurement loop running on both CPUs.
47+
*/
48+
static uint32_t check_counter_warp(void)
49+
{
50+
uint32_t start, now, prev, end, cur_max_warp = 0;
51+
int i, cur_warps = 0;
5752

58-
/* Count will be initialised to current timer */
59-
if (i == 1)
60-
initcount = read_c0_count();
53+
start = read_c0_count();
54+
end = start + (uint32_t) mips_hpt_frequency / 1000 * LOOP_TIMEOUT;
6155

56+
for (i = 0; ; i++) {
6257
/*
63-
* Everyone initialises count in the last loop:
58+
* We take the global lock, measure counter, save the
59+
* previous counter that was measured (possibly on
60+
* another CPU) and update the previous counter timestamp.
6461
*/
65-
if (i == NR_LOOPS-1)
66-
write_c0_count(initcount);
62+
arch_spin_lock(&sync_lock);
63+
prev = last_counter;
64+
now = read_c0_count();
65+
last_counter = now;
66+
arch_spin_unlock(&sync_lock);
6767

6868
/*
69-
* Wait for slave to leave the synchronization point:
69+
* Be nice every now and then (and also check whether
70+
* measurement is done [we also insert a 10 million
71+
* loops safety exit, so we dont lock up in case the
72+
* counter is totally broken]):
7073
*/
71-
while (atomic_read(&count_count_stop) != 1)
72-
mb();
73-
atomic_set(&count_count_start, 0);
74-
smp_wmb();
75-
atomic_inc(&count_count_stop);
74+
if (unlikely(!(i & 7))) {
75+
if (now > end || i > 10000000)
76+
break;
77+
cpu_relax();
78+
touch_nmi_watchdog();
79+
}
80+
/*
81+
* Outside the critical section we can now see whether
82+
* we saw a time-warp of the counter going backwards:
83+
*/
84+
if (unlikely(prev > now)) {
85+
arch_spin_lock(&sync_lock);
86+
max_warp = max(max_warp, prev - now);
87+
cur_max_warp = max_warp;
88+
/*
89+
* Check whether this bounces back and forth. Only
90+
* one CPU should observe time going backwards.
91+
*/
92+
if (cur_warps != nr_warps)
93+
random_warps++;
94+
nr_warps++;
95+
cur_warps = nr_warps;
96+
arch_spin_unlock(&sync_lock);
97+
}
98+
}
99+
WARN(!(now-start),
100+
"Warning: zero counter calibration delta: %d [max: %d]\n",
101+
now-start, end-start);
102+
return cur_max_warp;
103+
}
104+
105+
/*
106+
* The freshly booted CPU initiates this via an async SMP function call.
107+
*/
108+
static void check_counter_sync_source(void *__cpu)
109+
{
110+
unsigned int cpu = (unsigned long)__cpu;
111+
int cpus = 2;
112+
113+
atomic_set(&test_runs, NR_LOOPS);
114+
retry:
115+
/* Wait for the target to start. */
116+
while (atomic_read(&start_count) != cpus - 1)
117+
cpu_relax();
118+
119+
/*
120+
* Trigger the target to continue into the measurement too:
121+
*/
122+
atomic_inc(&start_count);
123+
124+
check_counter_warp();
125+
126+
while (atomic_read(&stop_count) != cpus-1)
127+
cpu_relax();
128+
129+
/*
130+
* If the test was successful set the number of runs to zero and
131+
* stop. If not, decrement the number of runs an check if we can
132+
* retry. In case of random warps no retry is attempted.
133+
*/
134+
if (!nr_warps) {
135+
atomic_set(&test_runs, 0);
136+
137+
pr_info("Counter synchronization [CPU#%d -> CPU#%u]: passed\n",
138+
smp_processor_id(), cpu);
139+
} else if (atomic_dec_and_test(&test_runs) || random_warps) {
140+
/* Force it to 0 if random warps brought us here */
141+
atomic_set(&test_runs, 0);
142+
143+
pr_info("Counter synchronization [CPU#%d -> CPU#%u]:\n",
144+
smp_processor_id(), cpu);
145+
pr_info("Measured %d cycles counter warp between CPUs", max_warp);
146+
if (random_warps)
147+
pr_warn("Counter warped randomly between CPUs\n");
76148
}
77-
/* Arrange for an interrupt in a short while */
78-
write_c0_compare(read_c0_count() + COUNTON);
79149

80-
local_irq_restore(flags);
150+
/*
151+
* Reset it - just in case we boot another CPU later:
152+
*/
153+
atomic_set(&start_count, 0);
154+
random_warps = 0;
155+
nr_warps = 0;
156+
max_warp = 0;
157+
last_counter = 0;
158+
159+
/*
160+
* Let the target continue with the bootup:
161+
*/
162+
atomic_inc(&stop_count);
81163

82164
/*
83-
* i386 code reported the skew here, but the
84-
* count registers were almost certainly out of sync
85-
* so no point in alarming people
165+
* Retry, if there is a chance to do so.
86166
*/
87-
pr_cont("done.\n");
167+
if (atomic_read(&test_runs) > 0)
168+
goto retry;
88169
}
89170

171+
/*
172+
* Freshly booted CPUs call into this:
173+
*/
90174
void synchronise_count_slave(int cpu)
91175
{
92-
int i;
93-
unsigned long flags;
176+
uint32_t cur_max_warp, gbl_max_warp, count;
177+
int cpus = 2;
94178

95-
local_irq_save(flags);
179+
if (!cpu_has_counter || !mips_hpt_frequency)
180+
return;
96181

182+
/* Kick the control CPU into the counter synchronization function */
183+
smp_call_function_single(cpumask_first(cpu_online_mask),
184+
check_counter_sync_source,
185+
(unsigned long *)(unsigned long)cpu, 0);
186+
retry:
97187
/*
98-
* Not every cpu is online at the time this gets called,
99-
* so we first wait for the master to say everyone is ready
188+
* Register this CPU's participation and wait for the
189+
* source CPU to start the measurement:
100190
*/
191+
atomic_inc(&start_count);
192+
while (atomic_read(&start_count) != cpus)
193+
cpu_relax();
101194

102-
for (i = 0; i < NR_LOOPS; i++) {
103-
atomic_inc(&count_count_start);
104-
while (atomic_read(&count_count_start) != 2)
105-
mb();
195+
cur_max_warp = check_counter_warp();
106196

107-
/*
108-
* Everyone initialises count in the last loop:
109-
*/
110-
if (i == NR_LOOPS-1)
111-
write_c0_count(initcount);
197+
/*
198+
* Store the maximum observed warp value for a potential retry:
199+
*/
200+
gbl_max_warp = max_warp;
201+
202+
/*
203+
* Ok, we are done:
204+
*/
205+
atomic_inc(&stop_count);
206+
207+
/*
208+
* Wait for the source CPU to print stuff:
209+
*/
210+
while (atomic_read(&stop_count) != cpus)
211+
cpu_relax();
112212

113-
atomic_inc(&count_count_stop);
114-
while (atomic_read(&count_count_stop) != 2)
115-
mb();
213+
/*
214+
* Reset it for the next sync test:
215+
*/
216+
atomic_set(&stop_count, 0);
217+
218+
/*
219+
* Check the number of remaining test runs. If not zero, the test
220+
* failed and a retry with adjusted counter is possible. If zero the
221+
* test was either successful or failed terminally.
222+
*/
223+
if (!atomic_read(&test_runs)) {
224+
/* Arrange for an interrupt in a short while */
225+
write_c0_compare(read_c0_count() + COUNTON);
226+
return;
116227
}
117-
/* Arrange for an interrupt in a short while */
118-
write_c0_compare(read_c0_count() + COUNTON);
119228

120-
local_irq_restore(flags);
229+
/*
230+
* If the warp value of this CPU is 0, then the other CPU
231+
* observed time going backwards so this counter was ahead and
232+
* needs to move backwards.
233+
*/
234+
if (!cur_max_warp)
235+
cur_max_warp = -gbl_max_warp;
236+
237+
count = read_c0_count();
238+
count += cur_max_warp;
239+
write_c0_count(count);
240+
241+
pr_debug("Counter compensate: CPU%u observed %d warp\n", cpu, cur_max_warp);
242+
243+
goto retry;
244+
121245
}
122-
#undef NR_LOOPS

0 commit comments

Comments
 (0)