Skip to content

Commit 01dec6f

Browse files
authored
Add C11 standard atomic support (#1645)
* Add runtime support for stdatomics * Fix lock calculation and enable atomic_flag support
1 parent 86aab81 commit 01dec6f

File tree

5 files changed

+387
-0
lines changed

5 files changed

+387
-0
lines changed

src/rp2_common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ if (NOT PICO_BARE_METAL)
5151
pico_add_subdirectory(pico_malloc)
5252
pico_add_subdirectory(pico_printf)
5353
pico_add_subdirectory(pico_rand)
54+
pico_add_subdirectory(pico_atomic)
5455

5556
pico_add_subdirectory(pico_stdio)
5657
pico_add_subdirectory(pico_stdio_semihosting)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
if (NOT TARGET pico_atomic)
2+
pico_add_library(pico_atomic)
3+
4+
target_sources(pico_atomic INTERFACE
5+
${CMAKE_CURRENT_LIST_DIR}/pico_atomic.c
6+
)
7+
8+
target_include_directories(pico_atomic_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include)
9+
10+
target_link_libraries(pico_atomic INTERFACE pico_sync)
11+
endif()
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
3+
* Copyright (c) 2024 Stephen Street ([email protected]).
4+
*
5+
* SPDX-License-Identifier: BSD-3-Clause
6+
*/
7+
8+
#ifndef __STDATOMIC_H
9+
#define __STDATOMIC_H
10+
11+
#include_next <stdatomic.h>
12+
13+
#undef atomic_flag_test_and_set
14+
#undef atomic_flag_test_and_set_explicit
15+
#undef atomic_flag_clear
16+
#undef atomic_flag_clear_explicit
17+
18+
extern _Bool __atomic_test_and_set_m0(volatile void *mem, int model);
19+
extern void __atomic_clear_m0 (volatile void *mem, int model);
20+
21+
#define atomic_flag_test_and_set(PTR) __atomic_test_and_set_m0((PTR), __ATOMIC_SEQ_CST)
22+
#define atomic_flag_test_and_set_explicit(PTR, MO) __atomic_test_and_set_m0((PTR), (MO))
23+
24+
#define atomic_flag_clear(PTR) __atomic_clear_m0((PTR), __ATOMIC_SEQ_CST)
25+
#define atomic_flag_clear_explicit(PTR, MO) __atomic_clear_m0((PTR), (MO))
26+
27+
#endif
Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
/*
2+
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
3+
* Copyright (c) 2024 Stephen Street ([email protected]).
4+
*
5+
* SPDX-License-Identifier: BSD-3-Clause
6+
*/
7+
8+
#include <stdbool.h>
9+
#include <stdint.h>
10+
11+
#include "hardware/address_mapped.h"
12+
#include "hardware/regs/watchdog.h"
13+
#include "hardware/sync.h"
14+
15+
#include "pico/config.h"
16+
17+
#ifndef __optimize
18+
#define __optimize __attribute__((optimize("-Os")))
19+
#endif
20+
21+
/* Must be powers of 2 */
22+
#define ATOMIC_STRIPE 4UL
23+
#define ATOMIC_LOCKS 16UL
24+
#define ATOMIC_LOCK_WIDTH 2UL
25+
#define ATOMIC_LOCK_IDX_Pos ((sizeof(unsigned long) * 8) - (__builtin_clz(ATOMIC_STRIPE - 1)))
26+
#define ATOMIC_LOCK_IDX_Msk (ATOMIC_LOCKS - 1UL)
27+
#define ATOMIC_LOCK_REG ((io_rw_32 *)(WATCHDOG_BASE + WATCHDOG_SCRATCH3_OFFSET))
28+
29+
static __used __attribute__((section(".preinit_array.00030"))) void __atomic_init(void) {
30+
*ATOMIC_LOCK_REG = 0;
31+
}
32+
33+
/*
34+
To eliminate interference with existing hardware spinlock usage and reduce multicore contention on
35+
unique atomic variables, we use one of the watchdog scratch registers (WATCHDOG_SCRATCH3) to
36+
implement 16, 2 bit, multicore locks, via a varation of Dekker's algorithm
37+
(see https://en.wikipedia.org/wiki/Dekker%27s_algorithm). The lock is selected as a
38+
function of the variable address and the stripe width which hashes variables
39+
addresses to locks numbers.
40+
*/
41+
static __optimize uint32_t __atomic_lock(volatile void *mem) {
42+
const uint32_t core = get_core_num();
43+
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk;
44+
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH;
45+
const uint32_t lock_mask = ((1UL << ATOMIC_LOCK_WIDTH) - 1) << lock_pos;
46+
const uint32_t locked_mask = 1UL << (lock_pos + core);
47+
48+
uint32_t state = save_and_disable_interrupts();
49+
while (true) {
50+
51+
/* First set the bit */
52+
hw_set_bits(ATOMIC_LOCK_REG, locked_mask);
53+
__dmb();
54+
55+
/* Did we get the lock? */
56+
if ((*ATOMIC_LOCK_REG & lock_mask) == locked_mask)
57+
break;
58+
59+
/* Nope, clear our side */
60+
__dmb();
61+
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask);
62+
63+
/* Need to break any ties if the cores are in lock step, is this really required? */
64+
for (uint32_t i = core * 2; i > 0; --i)
65+
asm volatile ("nop");
66+
}
67+
68+
return state;
69+
}
70+
71+
static __optimize void __atomic_unlock(volatile void *mem, uint32_t state) {
72+
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk;
73+
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH;
74+
const uint32_t locked_mask = 1UL << (lock_pos + get_core_num());
75+
76+
__dmb();
77+
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask);
78+
restore_interrupts(state);
79+
}
80+
81+
__optimize uint8_t __atomic_fetch_add_1(volatile void *mem, uint8_t val, __unused int model) {
82+
volatile uint8_t *ptr = mem;
83+
uint8_t state = __atomic_lock(mem);
84+
uint8_t result = *ptr;
85+
*ptr += val;
86+
__atomic_unlock(mem, state);
87+
return result;
88+
}
89+
90+
__optimize uint8_t __atomic_fetch_sub_1(volatile void *mem, uint8_t val, __unused int model) {
91+
volatile uint8_t *ptr = mem;
92+
uint8_t state = __atomic_lock(mem);
93+
uint8_t result = *ptr;
94+
*ptr -= val;
95+
__atomic_unlock(mem, state);
96+
return result;
97+
}
98+
99+
__optimize uint8_t __atomic_fetch_and_1(volatile void *mem, uint8_t val, __unused int model) {
100+
volatile uint8_t *ptr = mem;
101+
uint8_t state = __atomic_lock(mem);
102+
uint8_t result = *ptr;
103+
*ptr &= val;
104+
__atomic_unlock(mem, state);
105+
return result;
106+
}
107+
108+
__optimize uint8_t __atomic_fetch_or_1(volatile void *mem, uint8_t val, __unused int model) {
109+
volatile uint8_t *ptr = mem;
110+
uint8_t state = __atomic_lock(mem);
111+
uint8_t result = *ptr;
112+
*ptr |= val;
113+
__atomic_unlock(mem, state);
114+
return result;
115+
}
116+
117+
__optimize uint8_t __atomic_exchange_1(volatile void *mem, uint8_t val, __unused int model) {
118+
volatile uint8_t *ptr = mem;
119+
uint8_t state = __atomic_lock(mem);
120+
uint8_t result = *ptr;
121+
*ptr = val;
122+
__atomic_unlock(mem, state);
123+
return result;
124+
}
125+
126+
__optimize bool __atomic_compare_exchange_1(volatile void *mem, void *expected, uint8_t desired, __unused bool weak, __unused int success, __unused int failure) {
127+
bool result = false;
128+
volatile uint8_t *ptr = mem;
129+
uint8_t *e_ptr = expected;
130+
uint8_t state = __atomic_lock(mem);
131+
if (*ptr == *e_ptr) {
132+
*ptr = desired;
133+
result = true;
134+
} else
135+
*e_ptr = *ptr;
136+
__atomic_unlock(mem, state);
137+
return result;
138+
}
139+
140+
__optimize uint16_t __atomic_fetch_add_2(volatile void *mem, uint16_t val, __unused int model) {
141+
volatile uint16_t *ptr = mem;
142+
uint16_t state = __atomic_lock(mem);
143+
uint16_t result = *ptr;
144+
*ptr += val;
145+
__atomic_unlock(mem, state);
146+
return result;
147+
}
148+
149+
__optimize uint16_t __atomic_fetch_sub_2(volatile void *mem, uint16_t val, __unused int model) {
150+
volatile uint16_t *ptr = mem;
151+
uint16_t state = __atomic_lock(mem);
152+
uint16_t result = *ptr;
153+
*ptr -= val;
154+
__atomic_unlock(mem, state);
155+
return result;
156+
}
157+
158+
__optimize uint16_t __atomic_fetch_and_2(volatile void *mem, uint16_t val, __unused int model) {
159+
volatile uint16_t *ptr = mem;
160+
uint16_t state = __atomic_lock(mem);
161+
uint16_t result = *ptr;
162+
*ptr &= val;
163+
__atomic_unlock(mem, state);
164+
return result;
165+
}
166+
167+
__optimize uint16_t __atomic_fetch_or_2(volatile void *mem, uint16_t val, __unused int model) {
168+
volatile uint16_t *ptr = mem;
169+
uint16_t state = __atomic_lock(mem);
170+
uint16_t result = *ptr;
171+
*ptr |= val;
172+
__atomic_unlock(mem, state);
173+
return result;
174+
}
175+
176+
__optimize uint16_t __atomic_exchange_2(volatile void *mem, uint16_t val, __unused int model) {
177+
volatile uint16_t *ptr = mem;
178+
uint16_t state = __atomic_lock(mem);
179+
uint16_t result = *ptr;
180+
*ptr = val;
181+
__atomic_unlock(mem, state);
182+
return result;
183+
}
184+
185+
__optimize bool __atomic_compare_exchange_2(volatile void *mem, void *expected, uint16_t desired, __unused bool weak, __unused int success, __unused int failure) {
186+
bool result = false;
187+
volatile uint16_t *ptr = mem;
188+
uint16_t *e_ptr = expected;
189+
uint16_t state = __atomic_lock(mem);
190+
if (*ptr == *e_ptr) {
191+
*ptr = desired;
192+
result = true;
193+
} else
194+
*e_ptr = *ptr;
195+
__atomic_unlock(mem, state);
196+
return result;
197+
}
198+
199+
__optimize uint32_t __atomic_fetch_add_4(volatile void *mem, uint32_t val, __unused int model) {
200+
volatile uint32_t *ptr = mem;
201+
uint32_t state = __atomic_lock(mem);
202+
uint32_t result = *ptr;
203+
*ptr += val;
204+
__atomic_unlock(mem, state);
205+
return result;
206+
}
207+
208+
__optimize uint32_t __atomic_fetch_sub_4(volatile void *mem, uint32_t val, __unused int model) {
209+
volatile uint32_t *ptr = mem;
210+
uint32_t state = __atomic_lock(mem);
211+
uint32_t result = *ptr;
212+
*ptr -= val;
213+
__atomic_unlock(mem, state);
214+
return result;
215+
}
216+
217+
__optimize uint32_t __atomic_fetch_and_4(volatile void *mem, uint32_t val, __unused int model) {
218+
volatile uint32_t *ptr = mem;
219+
uint32_t state = __atomic_lock(mem);
220+
uint32_t result = *ptr;
221+
*ptr &= val;
222+
__atomic_unlock(mem, state);
223+
return result;
224+
}
225+
226+
__optimize uint32_t __atomic_fetch_or_4(volatile void *mem, uint32_t val, __unused int model) {
227+
volatile uint32_t *ptr = mem;
228+
uint32_t state = __atomic_lock(mem);
229+
uint32_t result = *ptr;
230+
*ptr |= val;
231+
__atomic_unlock(mem, state);
232+
return result;
233+
}
234+
235+
__optimize uint32_t __atomic_exchange_4(volatile void *mem, uint32_t val, __unused int model) {
236+
volatile uint32_t *ptr = mem;
237+
uint32_t state = __atomic_lock(mem);
238+
uint32_t result = *ptr;
239+
*ptr = val;
240+
__atomic_unlock(mem, state);
241+
return result;
242+
}
243+
244+
__optimize bool __atomic_compare_exchange_4(volatile void *mem, void *expected, uint32_t desired, __unused bool weak, __unused int success, __unused int failure) {
245+
bool result = false;
246+
volatile uint32_t *ptr = mem;
247+
uint32_t *e_ptr = expected;
248+
uint32_t state = __atomic_lock(mem);
249+
if (*ptr == *e_ptr) {
250+
*ptr = desired;
251+
result = true;
252+
} else
253+
*e_ptr = *ptr;
254+
__atomic_unlock(mem, state);
255+
return result;
256+
}
257+
258+
__optimize uint64_t __atomic_fetch_add_8(volatile void *mem, uint64_t val, __unused int model) {
259+
volatile uint64_t *ptr = mem;
260+
uint64_t state = __atomic_lock(mem);
261+
uint64_t result = *ptr;
262+
*ptr += val;
263+
__atomic_unlock(mem, state);
264+
return result;
265+
}
266+
267+
__optimize uint64_t __atomic_fetch_sub_8(volatile void *mem, uint64_t val, __unused int model) {
268+
volatile uint64_t *ptr = mem;
269+
uint64_t state = __atomic_lock(mem);
270+
uint64_t result = *ptr;
271+
*ptr -= val;
272+
__atomic_unlock(mem, state);
273+
return result;
274+
}
275+
276+
__optimize uint64_t __atomic_fetch_and_8(volatile void *mem, uint64_t val, __unused int model) {
277+
volatile uint64_t *ptr = mem;
278+
uint64_t state = __atomic_lock(mem);
279+
uint64_t result = *ptr;
280+
*ptr &= val;
281+
__atomic_unlock(mem, state);
282+
return result;
283+
}
284+
285+
__optimize uint64_t __atomic_fetch_or_8(volatile void *mem, uint64_t val, __unused int model) {
286+
volatile uint64_t *ptr = mem;
287+
uint64_t state = __atomic_lock(mem);
288+
uint64_t result = *ptr;
289+
*ptr |= val;
290+
__atomic_unlock(mem, state);
291+
return result;
292+
}
293+
294+
__optimize uint64_t __atomic_exchange_8(volatile void *mem, uint64_t val, __unused int model) {
295+
volatile uint64_t *ptr = mem;
296+
uint64_t state = __atomic_lock(mem);
297+
uint64_t result = *ptr;
298+
*ptr = val;
299+
__atomic_unlock(mem, state);
300+
return result;
301+
}
302+
303+
__optimize bool __atomic_compare_exchange_8(volatile void *mem, void *expected, uint64_t desired, __unused bool weak, __unused int success, __unused int failure) {
304+
bool result = false;
305+
volatile uint64_t *ptr = mem;
306+
uint64_t *e_ptr = expected;
307+
uint64_t state = __atomic_lock(mem);
308+
if (*ptr == *e_ptr) {
309+
*ptr = desired;
310+
result = true;
311+
} else
312+
*e_ptr = *ptr;
313+
__atomic_unlock(mem, state);
314+
return result;
315+
}
316+
317+
__optimize uint64_t __atomic_load_8(volatile void *mem, __unused int model) {
318+
volatile uint64_t *ptr = mem;
319+
uint32_t state = __atomic_lock(mem);
320+
uint32_t result = *ptr;
321+
__atomic_unlock(mem, state);
322+
return result;
323+
}
324+
325+
__optimize void __atomic_store_8(volatile void *mem, uint64_t val, __unused int model) {
326+
volatile uint64_t *ptr = mem;
327+
uint32_t state = __atomic_lock(mem);
328+
*ptr = val;
329+
__atomic_unlock(mem, state);
330+
}
331+
332+
__optimize bool __atomic_test_and_set_m0(volatile void *mem, __unused int model) {
333+
volatile bool *ptr = mem;
334+
uint32_t state = __atomic_lock(mem);
335+
volatile bool result = *ptr;
336+
*ptr = true;
337+
__atomic_unlock(mem, state);
338+
return result;
339+
}
340+
341+
__optimize void __atomic_clear_m0(volatile void *mem, __unused int model) {
342+
volatile bool *ptr = mem;
343+
*ptr = false;
344+
__dmb();
345+
}

0 commit comments

Comments
 (0)