Skip to content

Commit 70db40e

Browse files
authored
Merge pull request #8 from talubik/talubik/atomics
Atomic operations
2 parents 8baff3b + 7874520 commit 70db40e

File tree

3 files changed

+355
-2
lines changed

3 files changed

+355
-2
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1855,7 +1855,7 @@ if (ENABLE_VORTEX)
18551855
message(FATAL_ERROR "should set 'VORTEX_PREFIX' option")
18561856
endif()
18571857
set(BUILD_VORTEX 1)
1858-
set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_int64 cl_khr_fp64")
1858+
set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store cl_khr_int64 cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics")
18591859
set(VORTEX_DEVICE_CL_VERSION_MAJOR 1)
18601860
set(VORTEX_DEVICE_CL_VERSION_MINOR 2)
18611861
set(VORTEX_DEVICE_CL_VERSION "120")

lib/kernel/vortex/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ foreach(FILE printf.c printf_base.c
4141
list(REMOVE_ITEM KERNEL_SOURCES "${FILE}")
4242
endforeach()
4343

44-
foreach(FILE workitems.c printf.c barrier.c)
44+
foreach(FILE workitems.c printf.c barrier.c atomics.c)
4545
list(REMOVE_ITEM KERNEL_SOURCES "${FILE}")
4646
list(APPEND KERNEL_SOURCES "vortex/${FILE}")
4747
endforeach()

lib/kernel/vortex/atomics.c

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
// atomic_add//atomic_inc/atomic_dec/atomic_sub
2+
static inline int _vx_atomic_add_asm(volatile void *addr, int value)
3+
{
4+
int old_value;
5+
__asm__ volatile(
6+
"amoadd.w %0, %2, (%1)"
7+
: "=r"(old_value)
8+
: "r"(addr), "r"(value)
9+
: "memory");
10+
return old_value;
11+
}
12+
13+
int _Z14_cl_atomic_addPU8CLglobalVii(volatile void *ptr, int val)
14+
{
15+
return _vx_atomic_add_asm(ptr, val);
16+
}
17+
18+
int _Z14_cl_atomic_addPU7CLlocalVii(volatile void *ptr, int val)
19+
{
20+
return _vx_atomic_add_asm(ptr, val);
21+
}
22+
23+
unsigned int _Z14_cl_atomic_addPU8CLglobalVjj(volatile void *ptr, unsigned int val)
24+
{
25+
return _vx_atomic_add_asm(ptr, (int)val);
26+
}
27+
28+
unsigned int _Z14_cl_atomic_addPU7CLlocalVjj(volatile void *ptr, unsigned int val)
29+
{
30+
return _vx_atomic_add_asm(ptr, (int)val);
31+
}
32+
33+
// atomic_inc
34+
35+
int _Z14_cl_atomic_incPU8CLglobalVi(volatile void *ptr)
36+
{
37+
return _vx_atomic_add_asm(ptr, 1);
38+
}
39+
40+
int _Z14_cl_atomic_incPU7CLlocalVi(volatile void *ptr)
41+
{
42+
return _vx_atomic_add_asm(ptr, 1);
43+
}
44+
45+
unsigned int _Z14_cl_atomic_incPU8CLglobalVj(volatile void *ptr)
46+
{
47+
return _vx_atomic_add_asm(ptr, 1);
48+
}
49+
50+
unsigned int _Z14_cl_atomic_incPU7CLlocalVj(volatile void *ptr)
51+
{
52+
return _vx_atomic_add_asm(ptr, 1);
53+
}
54+
55+
// atomic_dec
56+
57+
int _Z14_cl_atomic_decPU8CLglobalVi(volatile void *ptr)
58+
{
59+
return _vx_atomic_add_asm(ptr, -1);
60+
}
61+
62+
int _Z14_cl_atomic_decPU7CLlocalVi(volatile void *ptr)
63+
{
64+
return _vx_atomic_add_asm(ptr, -1);
65+
}
66+
67+
unsigned int _Z14_cl_atomic_decPU7CLlocalVj(volatile void *ptr)
68+
{
69+
return _vx_atomic_add_asm(ptr, -1);
70+
}
71+
72+
unsigned int _Z14_cl_atomic_decPU8CLglobalVj(volatile void *ptr)
73+
{
74+
return _vx_atomic_add_asm(ptr, -1);
75+
}
76+
77+
// atomic_sub
78+
79+
int _Z14_cl_atomic_subPU7CLlocalVii(volatile void *ptr, int val)
80+
{
81+
return _vx_atomic_add_asm(ptr, -val);
82+
}
83+
84+
int _Z14_cl_atomic_subPU8CLglobalVii(volatile void *ptr, int val)
85+
{
86+
return _vx_atomic_add_asm(ptr, -val);
87+
}
88+
89+
unsigned int _Z14_cl_atomic_subPU7CLlocalVjj(volatile void *ptr, unsigned int val)
90+
{
91+
return _vx_atomic_add_asm(ptr, -(int)val);
92+
}
93+
94+
unsigned int _Z14_cl_atomic_subPU8CLglobalVjj(volatile void *ptr, unsigned int val)
95+
{
96+
return _vx_atomic_add_asm(ptr, -(int)val);
97+
}
98+
99+
// atomic_max
100+
101+
static inline int _vx_atomic_max_asm(volatile void *addr, int value)
102+
{
103+
int old_value;
104+
__asm__ volatile(
105+
"amomax.w %0, %2, (%1)"
106+
: "=r"(old_value)
107+
: "r"(addr), "r"(value)
108+
: "memory");
109+
return old_value;
110+
}
111+
112+
int _Z14_cl_atomic_maxPU7CLlocalVii(volatile void *ptr, int val)
113+
{
114+
return _vx_atomic_max_asm(ptr, val);
115+
}
116+
117+
int _Z14_cl_atomic_maxPU8CLglobalVii(volatile void *ptr, int val)
118+
{
119+
return _vx_atomic_max_asm(ptr, val);
120+
}
121+
122+
static inline unsigned int _vx_atomic_maxu_asm(volatile void *addr, unsigned int value)
123+
{
124+
unsigned int old_value;
125+
__asm__ volatile(
126+
"amomaxu.w %0, %2, (%1)"
127+
: "=r"(old_value)
128+
: "r"(addr), "r"(value)
129+
: "memory");
130+
return old_value;
131+
}
132+
133+
unsigned int _Z14_cl_atomic_maxPU7CLlocalVjj(volatile void *ptr, unsigned int val)
134+
{
135+
return _vx_atomic_maxu_asm(ptr, val);
136+
}
137+
138+
unsigned int _Z14_cl_atomic_maxPU8CLglobalVjj(volatile void *ptr, unsigned int val)
139+
{
140+
return _vx_atomic_maxu_asm(ptr, val);
141+
}
142+
143+
// atomic_min
144+
145+
static inline int _vx_atomic_min_asm(volatile void *addr, int value)
146+
{
147+
int old_value;
148+
__asm__ volatile(
149+
"amomin.w %0, %2, (%1)"
150+
: "=r"(old_value)
151+
: "r"(addr), "r"(value)
152+
: "memory");
153+
return old_value;
154+
}
155+
156+
int _Z14_cl_atomic_minPU7CLlocalVii(volatile void *ptr, int val)
157+
{
158+
return _vx_atomic_min_asm(ptr, val);
159+
}
160+
161+
int _Z14_cl_atomic_minPU8CLglobalVii(volatile void *ptr, int val)
162+
{
163+
return _vx_atomic_min_asm(ptr, val);
164+
}
165+
166+
static inline unsigned int _vx_atomic_minu_asm(volatile void *addr, unsigned int value)
167+
{
168+
unsigned int old_value;
169+
__asm__ volatile(
170+
"amominu.w %0, %2, (%1)"
171+
: "=r"(old_value)
172+
: "r"(addr), "r"(value)
173+
: "memory");
174+
return old_value;
175+
}
176+
177+
unsigned int _Z14_cl_atomic_minPU7CLlocalVjj(volatile void *ptr, unsigned int val)
178+
{
179+
return _vx_atomic_minu_asm(ptr, val);
180+
}
181+
182+
unsigned int _Z14_cl_atomic_minPU8CLglobalVjj(volatile void *ptr, unsigned int val)
183+
{
184+
return _vx_atomic_minu_asm(ptr, val);
185+
}
186+
187+
// atomic_xor
188+
189+
static inline int _vx_atomic_xor_asm(volatile void *addr, int value)
190+
{
191+
int old_value;
192+
__asm__ volatile(
193+
"amoxor.w %0, %2, (%1)"
194+
: "=r"(old_value)
195+
: "r"(addr), "r"(value)
196+
: "memory");
197+
return old_value;
198+
}
199+
200+
int _Z14_cl_atomic_xorPU7CLlocalVii(volatile void *ptr, int val)
201+
{
202+
return _vx_atomic_xor_asm(ptr, val);
203+
}
204+
205+
int _Z14_cl_atomic_xorPU8CLglobalVii(volatile void *ptr, int val)
206+
{
207+
return _vx_atomic_xor_asm(ptr, val);
208+
}
209+
210+
unsigned int _Z14_cl_atomic_xorPU7CLlocalVjj(volatile void *ptr, unsigned int val)
211+
{
212+
return _vx_atomic_xor_asm(ptr, (int)val);
213+
}
214+
215+
unsigned int _Z14_cl_atomic_xorPU8CLglobalVjj(volatile void *ptr, unsigned int val)
216+
{
217+
return _vx_atomic_xor_asm(ptr, (int)val);
218+
}
219+
// atomic_or
220+
221+
static inline int _vx_atomic_or_asm(volatile void *addr, int value)
222+
{
223+
int old_value;
224+
__asm__ volatile(
225+
"amoor.w %0, %2, (%1)"
226+
: "=r"(old_value)
227+
: "r"(addr), "r"(value)
228+
: "memory");
229+
return old_value;
230+
}
231+
232+
int _Z13_cl_atomic_orPU7CLlocalVii(volatile void *ptr, int val)
233+
{
234+
return _vx_atomic_or_asm(ptr, val);
235+
}
236+
237+
int _Z13_cl_atomic_orPU8CLglobalVii(volatile void *ptr, int val)
238+
{
239+
return _vx_atomic_or_asm(ptr, val);
240+
}
241+
242+
unsigned int _Z13_cl_atomic_orPU7CLlocalVjj(volatile void *ptr, unsigned int val)
243+
{
244+
return _vx_atomic_or_asm(ptr, (int)val);
245+
}
246+
247+
unsigned int _Z13_cl_atomic_orPU8CLglobalVjj(volatile void *ptr, unsigned int val)
248+
{
249+
return _vx_atomic_or_asm(ptr, (int)val);
250+
}
251+
252+
// atomic_and
253+
254+
static inline int _vx_atomic_and_asm(volatile void *addr, int value)
255+
{
256+
int old_value;
257+
__asm__ volatile(
258+
"amoand.w %0, %2, (%1)"
259+
: "=r"(old_value)
260+
: "r"(addr), "r"(value)
261+
: "memory");
262+
return old_value;
263+
}
264+
265+
int _Z14_cl_atomic_andPU7CLlocalVii(volatile void *ptr, int val)
266+
{
267+
return _vx_atomic_and_asm(ptr, val);
268+
}
269+
270+
int _Z14_cl_atomic_andPU8CLglobalVii(volatile void *ptr, int val)
271+
{
272+
return _vx_atomic_and_asm(ptr, val);
273+
}
274+
275+
unsigned int _Z14_cl_atomic_andPU7CLlocalVjj(volatile void *ptr, unsigned int val)
276+
{
277+
return _vx_atomic_and_asm(ptr, (int)val);
278+
}
279+
280+
unsigned int _Z14_cl_atomic_andPU8CLglobalVjj(volatile void *ptr, unsigned int val)
281+
{
282+
return _vx_atomic_and_asm(ptr, (int)val);
283+
}
284+
285+
// atomic_xchg
286+
287+
static inline int _vx_atomic_xchg_asm(volatile void *addr, int value)
288+
{
289+
int old_value;
290+
__asm__ volatile(
291+
"amoswap.w %0, %2, (%1)"
292+
: "=r"(old_value)
293+
: "r"(addr), "r"(value)
294+
: "memory");
295+
return old_value;
296+
}
297+
int _Z15_cl_atomic_xchgPU8CLglobalVii(volatile void *ptr, int val)
298+
{
299+
return _vx_atomic_xchg_asm(ptr, val);
300+
}
301+
302+
int _Z15_cl_atomic_xchgPU7CLlocalVii(volatile void *ptr, int val)
303+
{
304+
return _vx_atomic_xchg_asm(ptr, val);
305+
}
306+
307+
unsigned int _Z15_cl_atomic_xchgPU8CLglobalVjj(volatile void *ptr, unsigned int val)
308+
{
309+
return _vx_atomic_xchg_asm(ptr, (int)val);
310+
}
311+
312+
unsigned int _Z15_cl_atomic_xchgPU7CLlocalVjj(volatile void *ptr, unsigned int val)
313+
{
314+
return _vx_atomic_xchg_asm(ptr, (int)val);
315+
}
316+
317+
// atomic_cmpxchg
318+
319+
static inline int _vx_atomic_cmpxchg_asm(volatile void *addr, int cmp_val, int new_val)
320+
{
321+
int old_val;
322+
int success;
323+
__asm__ volatile(
324+
"1: lr.w %0, (%2)\n"
325+
" bne %0, %3, 2f\n"
326+
" sc.w %1, %4, (%2)\n"
327+
" bnez %1, 1b\n"
328+
"2:"
329+
: "=&r"(old_val), "=&r"(success)
330+
: "r"(addr), "r"(cmp_val), "r"(new_val)
331+
: "memory");
332+
return old_val;
333+
}
334+
335+
int _Z18_cl_atomic_cmpxchgPU7CLlocalViii(volatile void *ptr, int cmp_val, int new_val)
336+
{
337+
return _vx_atomic_cmpxchg_asm(ptr, cmp_val, new_val);
338+
}
339+
340+
int _Z18_cl_atomic_cmpxchgPU8CLglobalViii(volatile void *ptr, int cmp_val, int new_val)
341+
{
342+
return _vx_atomic_cmpxchg_asm(ptr, cmp_val, new_val);
343+
}
344+
345+
unsigned int _Z18_cl_atomic_cmpxchgPU7CLlocalVjjj(volatile void *ptr, unsigned int cmp_val, unsigned int new_val)
346+
{
347+
return _vx_atomic_cmpxchg_asm(ptr, (int)cmp_val, (int)new_val);
348+
}
349+
350+
unsigned int _Z18_cl_atomic_cmpxchgPU8CLglobalVjjj(volatile void *ptr, unsigned int cmp_val, unsigned int new_val)
351+
{
352+
return _vx_atomic_cmpxchg_asm(ptr, (int)cmp_val, (int)new_val);
353+
}

0 commit comments

Comments
 (0)