|
2 | 2 | /*
|
3 | 3 | * Count register synchronisation.
|
4 | 4 | *
|
5 |
| - * All CPUs will have their count registers synchronised to the CPU0 next time |
6 |
| - * value. This can cause a small timewarp for CPU0. All other CPU's should |
7 |
| - * not have done anything significant (but they may have had interrupts |
8 |
| - * enabled briefly - prom_smp_finish() should not be responsible for enabling |
9 |
| - * interrupts...) |
| 5 | + * Derived from arch/x86/kernel/tsc_sync.c |
| 6 | + * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar |
10 | 7 | */
|
11 | 8 |
|
12 | 9 | #include <linux/kernel.h>
|
13 | 10 | #include <linux/irqflags.h>
|
14 | 11 | #include <linux/cpumask.h>
|
| 12 | +#include <linux/atomic.h> |
| 13 | +#include <linux/nmi.h> |
| 14 | +#include <linux/smp.h> |
| 15 | +#include <linux/spinlock.h> |
15 | 16 |
|
16 | 17 | #include <asm/r4k-timer.h>
|
17 |
| -#include <linux/atomic.h> |
18 |
| -#include <asm/barrier.h> |
19 | 18 | #include <asm/mipsregs.h>
|
| 19 | +#include <asm/time.h> |
20 | 20 |
|
21 |
| -static unsigned int initcount = 0; |
22 |
| -static atomic_t count_count_start = ATOMIC_INIT(0); |
23 |
| -static atomic_t count_count_stop = ATOMIC_INIT(0); |
24 |
| - |
25 |
| -#define COUNTON 100 |
26 |
| -#define NR_LOOPS 3 |
27 |
| - |
28 |
| -void synchronise_count_master(int cpu) |
29 |
| -{ |
30 |
| - int i; |
31 |
| - unsigned long flags; |
32 |
| - |
33 |
| - pr_info("Synchronize counters for CPU %u: ", cpu); |
| 21 | +#define COUNTON 100 |
| 22 | +#define NR_LOOPS 3 |
| 23 | +#define LOOP_TIMEOUT 20 |
34 | 24 |
|
35 |
| - local_irq_save(flags); |
| 25 | +/* |
| 26 | + * Entry/exit counters that make sure that both CPUs |
| 27 | + * run the measurement code at once: |
| 28 | + */ |
| 29 | +static atomic_t start_count; |
| 30 | +static atomic_t stop_count; |
| 31 | +static atomic_t test_runs; |
36 | 32 |
|
37 |
| - /* |
38 |
| - * We loop a few times to get a primed instruction cache, |
39 |
| - * then the last pass is more or less synchronised and |
40 |
| - * the master and slaves each set their cycle counters to a known |
41 |
| - * value all at once. This reduces the chance of having random offsets |
42 |
| - * between the processors, and guarantees that the maximum |
43 |
| - * delay between the cycle counters is never bigger than |
44 |
| - * the latency of information-passing (cachelines) between |
45 |
| - * two CPUs. |
46 |
| - */ |
| 33 | +/* |
| 34 | + * We use a raw spinlock in this exceptional case, because |
| 35 | + * we want to have the fastest, inlined, non-debug version |
| 36 | + * of a critical section, to be able to prove counter time-warps: |
| 37 | + */ |
| 38 | +static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
47 | 39 |
|
48 |
| - for (i = 0; i < NR_LOOPS; i++) { |
49 |
| - /* slaves loop on '!= 2' */ |
50 |
| - while (atomic_read(&count_count_start) != 1) |
51 |
| - mb(); |
52 |
| - atomic_set(&count_count_stop, 0); |
53 |
| - smp_wmb(); |
| 40 | +static uint32_t last_counter; |
| 41 | +static uint32_t max_warp; |
| 42 | +static int nr_warps; |
| 43 | +static int random_warps; |
54 | 44 |
|
55 |
| - /* Let the slave writes its count register */ |
56 |
| - atomic_inc(&count_count_start); |
| 45 | +/* |
| 46 | + * Counter warp measurement loop running on both CPUs. |
| 47 | + */ |
| 48 | +static uint32_t check_counter_warp(void) |
| 49 | +{ |
| 50 | + uint32_t start, now, prev, end, cur_max_warp = 0; |
| 51 | + int i, cur_warps = 0; |
57 | 52 |
|
58 |
| - /* Count will be initialised to current timer */ |
59 |
| - if (i == 1) |
60 |
| - initcount = read_c0_count(); |
| 53 | + start = read_c0_count(); |
| 54 | + end = start + (uint32_t) mips_hpt_frequency / 1000 * LOOP_TIMEOUT; |
61 | 55 |
|
| 56 | + for (i = 0; ; i++) { |
62 | 57 | /*
|
63 |
| - * Everyone initialises count in the last loop: |
| 58 | + * We take the global lock, measure counter, save the |
| 59 | + * previous counter that was measured (possibly on |
| 60 | + * another CPU) and update the previous counter timestamp. |
64 | 61 | */
|
65 |
| - if (i == NR_LOOPS-1) |
66 |
| - write_c0_count(initcount); |
| 62 | + arch_spin_lock(&sync_lock); |
| 63 | + prev = last_counter; |
| 64 | + now = read_c0_count(); |
| 65 | + last_counter = now; |
| 66 | + arch_spin_unlock(&sync_lock); |
67 | 67 |
|
68 | 68 | /*
|
69 |
| - * Wait for slave to leave the synchronization point: |
| 69 | + * Be nice every now and then (and also check whether |
| 70 | + * measurement is done [we also insert a 10 million |
| 71 | + * loops safety exit, so we dont lock up in case the |
| 72 | + * counter is totally broken]): |
70 | 73 | */
|
71 |
| - while (atomic_read(&count_count_stop) != 1) |
72 |
| - mb(); |
73 |
| - atomic_set(&count_count_start, 0); |
74 |
| - smp_wmb(); |
75 |
| - atomic_inc(&count_count_stop); |
| 74 | + if (unlikely(!(i & 7))) { |
| 75 | + if (now > end || i > 10000000) |
| 76 | + break; |
| 77 | + cpu_relax(); |
| 78 | + touch_nmi_watchdog(); |
| 79 | + } |
| 80 | + /* |
| 81 | + * Outside the critical section we can now see whether |
| 82 | + * we saw a time-warp of the counter going backwards: |
| 83 | + */ |
| 84 | + if (unlikely(prev > now)) { |
| 85 | + arch_spin_lock(&sync_lock); |
| 86 | + max_warp = max(max_warp, prev - now); |
| 87 | + cur_max_warp = max_warp; |
| 88 | + /* |
| 89 | + * Check whether this bounces back and forth. Only |
| 90 | + * one CPU should observe time going backwards. |
| 91 | + */ |
| 92 | + if (cur_warps != nr_warps) |
| 93 | + random_warps++; |
| 94 | + nr_warps++; |
| 95 | + cur_warps = nr_warps; |
| 96 | + arch_spin_unlock(&sync_lock); |
| 97 | + } |
| 98 | + } |
| 99 | + WARN(!(now-start), |
| 100 | + "Warning: zero counter calibration delta: %d [max: %d]\n", |
| 101 | + now-start, end-start); |
| 102 | + return cur_max_warp; |
| 103 | +} |
| 104 | + |
| 105 | +/* |
| 106 | + * The freshly booted CPU initiates this via an async SMP function call. |
| 107 | + */ |
| 108 | +static void check_counter_sync_source(void *__cpu) |
| 109 | +{ |
| 110 | + unsigned int cpu = (unsigned long)__cpu; |
| 111 | + int cpus = 2; |
| 112 | + |
| 113 | + atomic_set(&test_runs, NR_LOOPS); |
| 114 | +retry: |
| 115 | + /* Wait for the target to start. */ |
| 116 | + while (atomic_read(&start_count) != cpus - 1) |
| 117 | + cpu_relax(); |
| 118 | + |
| 119 | + /* |
| 120 | + * Trigger the target to continue into the measurement too: |
| 121 | + */ |
| 122 | + atomic_inc(&start_count); |
| 123 | + |
| 124 | + check_counter_warp(); |
| 125 | + |
| 126 | + while (atomic_read(&stop_count) != cpus-1) |
| 127 | + cpu_relax(); |
| 128 | + |
| 129 | + /* |
| 130 | + * If the test was successful set the number of runs to zero and |
| 131 | + * stop. If not, decrement the number of runs an check if we can |
| 132 | + * retry. In case of random warps no retry is attempted. |
| 133 | + */ |
| 134 | + if (!nr_warps) { |
| 135 | + atomic_set(&test_runs, 0); |
| 136 | + |
| 137 | + pr_info("Counter synchronization [CPU#%d -> CPU#%u]: passed\n", |
| 138 | + smp_processor_id(), cpu); |
| 139 | + } else if (atomic_dec_and_test(&test_runs) || random_warps) { |
| 140 | + /* Force it to 0 if random warps brought us here */ |
| 141 | + atomic_set(&test_runs, 0); |
| 142 | + |
| 143 | + pr_info("Counter synchronization [CPU#%d -> CPU#%u]:\n", |
| 144 | + smp_processor_id(), cpu); |
| 145 | + pr_info("Measured %d cycles counter warp between CPUs", max_warp); |
| 146 | + if (random_warps) |
| 147 | + pr_warn("Counter warped randomly between CPUs\n"); |
76 | 148 | }
|
77 |
| - /* Arrange for an interrupt in a short while */ |
78 |
| - write_c0_compare(read_c0_count() + COUNTON); |
79 | 149 |
|
80 |
| - local_irq_restore(flags); |
| 150 | + /* |
| 151 | + * Reset it - just in case we boot another CPU later: |
| 152 | + */ |
| 153 | + atomic_set(&start_count, 0); |
| 154 | + random_warps = 0; |
| 155 | + nr_warps = 0; |
| 156 | + max_warp = 0; |
| 157 | + last_counter = 0; |
| 158 | + |
| 159 | + /* |
| 160 | + * Let the target continue with the bootup: |
| 161 | + */ |
| 162 | + atomic_inc(&stop_count); |
81 | 163 |
|
82 | 164 | /*
|
83 |
| - * i386 code reported the skew here, but the |
84 |
| - * count registers were almost certainly out of sync |
85 |
| - * so no point in alarming people |
| 165 | + * Retry, if there is a chance to do so. |
86 | 166 | */
|
87 |
| - pr_cont("done.\n"); |
| 167 | + if (atomic_read(&test_runs) > 0) |
| 168 | + goto retry; |
88 | 169 | }
|
89 | 170 |
|
| 171 | +/* |
| 172 | + * Freshly booted CPUs call into this: |
| 173 | + */ |
90 | 174 | void synchronise_count_slave(int cpu)
|
91 | 175 | {
|
92 |
| - int i; |
93 |
| - unsigned long flags; |
| 176 | + uint32_t cur_max_warp, gbl_max_warp, count; |
| 177 | + int cpus = 2; |
94 | 178 |
|
95 |
| - local_irq_save(flags); |
| 179 | + if (!cpu_has_counter || !mips_hpt_frequency) |
| 180 | + return; |
96 | 181 |
|
| 182 | + /* Kick the control CPU into the counter synchronization function */ |
| 183 | + smp_call_function_single(cpumask_first(cpu_online_mask), |
| 184 | + check_counter_sync_source, |
| 185 | + (unsigned long *)(unsigned long)cpu, 0); |
| 186 | +retry: |
97 | 187 | /*
|
98 |
| - * Not every cpu is online at the time this gets called, |
99 |
| - * so we first wait for the master to say everyone is ready |
| 188 | + * Register this CPU's participation and wait for the |
| 189 | + * source CPU to start the measurement: |
100 | 190 | */
|
| 191 | + atomic_inc(&start_count); |
| 192 | + while (atomic_read(&start_count) != cpus) |
| 193 | + cpu_relax(); |
101 | 194 |
|
102 |
| - for (i = 0; i < NR_LOOPS; i++) { |
103 |
| - atomic_inc(&count_count_start); |
104 |
| - while (atomic_read(&count_count_start) != 2) |
105 |
| - mb(); |
| 195 | + cur_max_warp = check_counter_warp(); |
106 | 196 |
|
107 |
| - /* |
108 |
| - * Everyone initialises count in the last loop: |
109 |
| - */ |
110 |
| - if (i == NR_LOOPS-1) |
111 |
| - write_c0_count(initcount); |
| 197 | + /* |
| 198 | + * Store the maximum observed warp value for a potential retry: |
| 199 | + */ |
| 200 | + gbl_max_warp = max_warp; |
| 201 | + |
| 202 | + /* |
| 203 | + * Ok, we are done: |
| 204 | + */ |
| 205 | + atomic_inc(&stop_count); |
| 206 | + |
| 207 | + /* |
| 208 | + * Wait for the source CPU to print stuff: |
| 209 | + */ |
| 210 | + while (atomic_read(&stop_count) != cpus) |
| 211 | + cpu_relax(); |
112 | 212 |
|
113 |
| - atomic_inc(&count_count_stop); |
114 |
| - while (atomic_read(&count_count_stop) != 2) |
115 |
| - mb(); |
| 213 | + /* |
| 214 | + * Reset it for the next sync test: |
| 215 | + */ |
| 216 | + atomic_set(&stop_count, 0); |
| 217 | + |
| 218 | + /* |
| 219 | + * Check the number of remaining test runs. If not zero, the test |
| 220 | + * failed and a retry with adjusted counter is possible. If zero the |
| 221 | + * test was either successful or failed terminally. |
| 222 | + */ |
| 223 | + if (!atomic_read(&test_runs)) { |
| 224 | + /* Arrange for an interrupt in a short while */ |
| 225 | + write_c0_compare(read_c0_count() + COUNTON); |
| 226 | + return; |
116 | 227 | }
|
117 |
| - /* Arrange for an interrupt in a short while */ |
118 |
| - write_c0_compare(read_c0_count() + COUNTON); |
119 | 228 |
|
120 |
| - local_irq_restore(flags); |
| 229 | + /* |
| 230 | + * If the warp value of this CPU is 0, then the other CPU |
| 231 | + * observed time going backwards so this counter was ahead and |
| 232 | + * needs to move backwards. |
| 233 | + */ |
| 234 | + if (!cur_max_warp) |
| 235 | + cur_max_warp = -gbl_max_warp; |
| 236 | + |
| 237 | + count = read_c0_count(); |
| 238 | + count += cur_max_warp; |
| 239 | + write_c0_count(count); |
| 240 | + |
| 241 | + pr_debug("Counter compensate: CPU%u observed %d warp\n", cpu, cur_max_warp); |
| 242 | + |
| 243 | + goto retry; |
| 244 | + |
121 | 245 | }
|
122 |
| -#undef NR_LOOPS |
|
0 commit comments