Skip to content

Commit 366a996

Browse files
authored
Merge pull request #239 from bonachea/amo
Implement PRIF Atomic Memory Operations
2 parents 524eb6e + 5220ef1 commit 366a996

File tree

8 files changed

+729
-143
lines changed

8 files changed

+729
-143
lines changed

docs/implementation-status.md

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -198,39 +198,36 @@ are accepted, but in some cases, the associated runtime behavior is not fully im
198198
---
199199

200200
## Atomic Memory Operations
201-
### Support = no
202201

203-
<!---
204202
| Procedure | Status | Notes |
205203
|-----------|--------|-------|
206-
| `prif_atomic_add` | no | |
207-
| `prif_atomic_add_indirect` | no | |
208-
| `prif_atomic_and` | no | |
209-
| `prif_atomic_and_indirect` | no | |
210-
| `prif_atomic_or` | no | |
211-
| `prif_atomic_or_indirect` | no | |
212-
| `prif_atomic_xor` | no | |
213-
| `prif_atomic_xor_indirect` | no | |
214-
| `prif_atomic_cas_int` | no | |
215-
| `prif_atomic_cas_int_indirect` | no | |
216-
| `prif_atomic_cas_logical` | no | |
217-
| `prif_atomic_cas_logical_indirect` | no | |
218-
| `prif_atomic_fetch_add` | no | |
219-
| `prif_atomic_fetch_add_indirect` | no | |
220-
| `prif_atomic_fetch_and` | no | |
221-
| `prif_atomic_fetch_and_indirect` | no | |
222-
| `prif_atomic_fetch_or` | no | |
223-
| `prif_atomic_fetch_or_indirect` | no | |
224-
| `prif_atomic_fetch_xor` | no | |
225-
| `prif_atomic_fetch_xor_indirect` | no | |
226-
| `prif_atomic_define_int` | no | |
227-
| `prif_atomic_define_int_indirect` | no | |
228-
| `prif_atomic_define_logical` | no | |
229-
| `prif_atomic_define_logical_indirect` | no | |
230-
| `prif_atomic_ref_int` | no | |
231-
| `prif_atomic_ref_int_indirect` | no | |
232-
| `prif_atomic_ref_logical` | no | |
233-
| `prif_atomic_ref_logical_indirect` | no | |
234-
-->
204+
| `prif_atomic_add` | **YES** | |
205+
| `prif_atomic_add_indirect` | **YES** | |
206+
| `prif_atomic_and` | **YES** | |
207+
| `prif_atomic_and_indirect` | **YES** | |
208+
| `prif_atomic_or` | **YES** | |
209+
| `prif_atomic_or_indirect` | **YES** | |
210+
| `prif_atomic_xor` | **YES** | |
211+
| `prif_atomic_xor_indirect` | **YES** | |
212+
| `prif_atomic_cas_int` | **YES** | |
213+
| `prif_atomic_cas_int_indirect` | **YES** | |
214+
| `prif_atomic_cas_logical` | **YES** | |
215+
| `prif_atomic_cas_logical_indirect` | **YES** | |
216+
| `prif_atomic_fetch_add` | **YES** | |
217+
| `prif_atomic_fetch_add_indirect` | **YES** | |
218+
| `prif_atomic_fetch_and` | **YES** | |
219+
| `prif_atomic_fetch_and_indirect` | **YES** | |
220+
| `prif_atomic_fetch_or` | **YES** | |
221+
| `prif_atomic_fetch_or_indirect` | **YES** | |
222+
| `prif_atomic_fetch_xor` | **YES** | |
223+
| `prif_atomic_fetch_xor_indirect` | **YES** | |
224+
| `prif_atomic_define_int` | **YES** | |
225+
| `prif_atomic_define_int_indirect` | **YES** | |
226+
| `prif_atomic_define_logical` | **YES** | |
227+
| `prif_atomic_define_logical_indirect` | **YES** | |
228+
| `prif_atomic_ref_int` | **YES** | |
229+
| `prif_atomic_ref_int_indirect` | **YES** | |
230+
| `prif_atomic_ref_logical` | **YES** | |
231+
| `prif_atomic_ref_logical_indirect` | **YES** | |
235232

236233
---

src/caffeine/atomic_s.F90

Lines changed: 67 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,78 @@
11
! Copyright (c), The Regents of the University of California
22
! Terms of use are as specified in LICENSE.txt
3+
4+
#include "assert_macros.h"
5+
#include "caffeine-internal.h"
6+
37
submodule(prif:prif_private_s) atomic_s
48
! DO NOT ADD USE STATEMENTS HERE
59
! All use statements belong in prif_private_s.F90
610
implicit none
711

12+
! placeholder variables that simplify the macro logic below
13+
integer(PRIF_ATOMIC_INT_KIND) :: out_placeholder_int
14+
logical(PRIF_ATOMIC_LOGICAL_KIND) :: out_placeholder_logical
15+
integer(PRIF_ATOMIC_INT_KIND), parameter :: in_placeholder_int = 0
16+
logical(PRIF_ATOMIC_LOGICAL_KIND), parameter :: in_placeholder_logical = .false.
817
contains
918

10-
module procedure prif_atomic_add
11-
call unimplemented("prif_atomic_add")
12-
end procedure
13-
14-
module procedure prif_atomic_add_indirect
15-
call unimplemented("prif_atomic_add_indirect")
16-
end procedure
17-
18-
module procedure prif_atomic_and
19-
call unimplemented("prif_atomic_and")
20-
end procedure
21-
22-
module procedure prif_atomic_and_indirect
23-
call unimplemented("prif_atomic_and_indirect")
24-
end procedure
25-
26-
module procedure prif_atomic_or
27-
call unimplemented("prif_atomic_or")
28-
end procedure
29-
30-
module procedure prif_atomic_or_indirect
31-
call unimplemented("prif_atomic_or_indirect")
32-
end procedure
33-
34-
module procedure prif_atomic_xor
35-
call unimplemented("prif_atomic_xor")
36-
end procedure
37-
38-
module procedure prif_atomic_xor_indirect
39-
call unimplemented("prif_atomic_xor_indirect")
40-
end procedure
41-
42-
module procedure prif_atomic_cas_int
43-
call unimplemented("prif_atomic_cas_int")
44-
end procedure
45-
46-
module procedure prif_atomic_cas_int_indirect
47-
call unimplemented("prif_atomic_cas_int_indirect")
48-
end procedure
49-
50-
module procedure prif_atomic_cas_logical
51-
call unimplemented("prif_atomic_cas_logical")
52-
end procedure
53-
54-
module procedure prif_atomic_cas_logical_indirect
55-
call unimplemented("prif_atomic_cas_logical_indirect")
56-
end procedure
57-
58-
module procedure prif_atomic_fetch_add
59-
call unimplemented("prif_atomic_fetch_add")
60-
end procedure
61-
62-
module procedure prif_atomic_fetch_add_indirect
63-
call unimplemented("prif_atomic_fetch_add_indirect")
64-
end procedure
65-
66-
module procedure prif_atomic_fetch_and
67-
call unimplemented("prif_atomic_fetch_and")
68-
end procedure
69-
70-
module procedure prif_atomic_fetch_and_indirect
71-
call unimplemented("prif_atomic_fetch_and_indirect")
72-
end procedure
73-
74-
module procedure prif_atomic_fetch_or
75-
call unimplemented("prif_atomic_fetch_or")
76-
end procedure
77-
78-
module procedure prif_atomic_fetch_or_indirect
79-
call unimplemented("prif_atomic_fetch_or_indirect")
80-
end procedure
81-
82-
module procedure prif_atomic_fetch_xor
83-
call unimplemented("prif_atomic_fetch_xor")
84-
end procedure
85-
86-
module procedure prif_atomic_fetch_xor_indirect
87-
call unimplemented("prif_atomic_fetch_xor_indirect")
88-
end procedure
89-
90-
module procedure prif_atomic_define_int
91-
call unimplemented("prif_atomic_define_int")
92-
end procedure
93-
94-
module procedure prif_atomic_define_int_indirect
95-
call unimplemented("prif_atomic_define_int_indirect")
96-
end procedure
97-
98-
module procedure prif_atomic_define_logical
99-
call unimplemented("prif_atomic_define_logical")
100-
end procedure
101-
102-
module procedure prif_atomic_define_logical_indirect
103-
call unimplemented("prif_atomic_define_logical_indirect")
104-
end procedure
105-
106-
module procedure prif_atomic_ref_int
107-
call unimplemented("prif_atomic_ref_int")
108-
end procedure
109-
110-
module procedure prif_atomic_ref_int_indirect
111-
call unimplemented("prif_atomic_ref_int_indirect")
112-
end procedure
113-
114-
module procedure prif_atomic_ref_logical
115-
call unimplemented("prif_atomic_ref_logical")
116-
end procedure
117-
118-
module procedure prif_atomic_ref_logical_indirect
119-
call unimplemented("prif_atomic_ref_logical_indirect")
120-
end procedure
19+
#define ATOMIC_OP(OPNAME, OPCODE, caf_op) \
20+
module procedure CAF_CONCAT2(prif_atomic_,OPNAME) ; \
21+
integer(c_intptr_t) :: remote_base; \
22+
call_assert(offset >= 0); \
23+
call base_pointer(coarray_handle, image_num, remote_base); \
24+
call CAF_CONCAT3(prif_atomic_,OPNAME,_indirect) \
25+
( image_num, remote_base + offset, OPPASSF, stat ); \
26+
end procedure ; \
27+
module procedure CAF_CONCAT3(prif_atomic_,OPNAME,_indirect) ; \
28+
call_assert(c_sizeof(out_placeholder_int) == 8); call_assert(c_sizeof(out_placeholder_logical) == 8); \
29+
call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range"); \
30+
call caf_op(CAF_CONCAT2(CAF_OP_,OPCODE), image_num, atom_remote_ptr, OPPASSC); \
31+
if (present(stat)) stat = 0; \
32+
end procedure
33+
34+
! Extra arg below is another workaround gfortran's sub-standard preprocessor
35+
#define ATOMIC_INT_OP(OPNAME,_,OPCODE) ATOMIC_OP(OPNAME,OPCODE,caf_atomic_int)
36+
#define ATOMIC_LOG_OP(OPNAME,_,OPCODE) ATOMIC_OP(OPNAME,OPCODE,caf_atomic_logical)
37+
38+
! OPPASSF defines the dummy argument pass-thru in Fortran,
39+
! from the direct module procedure to the indirect variant
40+
#undef OPPASSF
41+
#define OPPASSF value
42+
! OPPASSC defines the dummy argument pass-thru to C,
43+
! from the indirect module procedure to the BIND(C) call
44+
#undef OPPASSC
45+
#define OPPASSC value, in_placeholder_int, in_placeholder_int
46+
ATOMIC_INT_OP(ref_int, ,GET)
47+
#undef OPPASSC
48+
#define OPPASSC value, in_placeholder_logical, in_placeholder_logical
49+
ATOMIC_LOG_OP(ref_logical, ,GET)
50+
#undef OPPASSC
51+
#define OPPASSC out_placeholder_logical, value, in_placeholder_logical
52+
ATOMIC_LOG_OP(define_logical, ,SET)
53+
#undef OPPASSC
54+
#define OPPASSC out_placeholder_int, value, in_placeholder_int
55+
ATOMIC_INT_OP(define_int, ,SET)
56+
57+
ATOMIC_INT_OP(add, ,ADD)
58+
ATOMIC_INT_OP(and, ,AND)
59+
ATOMIC_INT_OP(or, ,OR)
60+
ATOMIC_INT_OP(xor, ,XOR)
61+
62+
#undef OPPASSF
63+
#define OPPASSF value, old
64+
#undef OPPASSC
65+
#define OPPASSC old, value, in_placeholder_int
66+
ATOMIC_INT_OP(fetch_add, ,FADD)
67+
ATOMIC_INT_OP(fetch_and, ,FAND)
68+
ATOMIC_INT_OP(fetch_or, ,FOR)
69+
ATOMIC_INT_OP(fetch_xor, ,FXOR)
70+
71+
#undef OPPASSF
72+
#define OPPASSF old, compare, new
73+
#undef OPPASSC
74+
#define OPPASSC old, compare, new
75+
ATOMIC_INT_OP(cas_int, ,FCAS)
76+
ATOMIC_LOG_OP(cas_logical, ,FCAS)
12177

12278
end submodule atomic_s

src/caffeine/caffeine-internal.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# /* Copyright (c), The Regents of the University of California */
2+
# /* Terms of use are as specified in LICENSE.txt */
3+
4+
# /* NOTE: this is a dual-language header file, */
5+
# /* and should ONLY contain portable preprocessor directives. */
6+
7+
# /* define some macro portability helpers */
8+
#if defined(__GFORTRAN__) || defined(_CRAYFTN) || defined(NAGFOR)
9+
# define CAF_CONCAT2(x,y) x/**/y
10+
# define CAF_CONCAT3(x,y,z) x/**/y/**/z
11+
# define CAF_STRINGIFY_HELPER(x) "x"
12+
#else
13+
# define CAF_CONCAT2(x,y) x##y
14+
# define CAF_CONCAT3(x,y,z) x##y##z
15+
# define CAF_STRINGIFY_HELPER(x) #x
16+
#endif
17+
#define CAF_STRINGIFY(x) CAF_STRINGIFY_HELPER(x)
18+
19+
# /* AMO support defines */
20+
21+
#define CAF_OP_GET 0
22+
#define CAF_OP_SET 1
23+
#define CAF_OP_ADD 2
24+
#define CAF_OP_AND 3
25+
#define CAF_OP_OR 4
26+
#define CAF_OP_XOR 5
27+
#define CAF_OP_FADD 6
28+
#define CAF_OP_FAND 7
29+
#define CAF_OP_FOR 8
30+
#define CAF_OP_FXOR 9
31+
#define CAF_OP_FCAS 10
32+

src/caffeine/caffeine.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ISO_Fortran_binding.h>
1616
#include "../dlmalloc/dl_malloc_caf.h"
1717
#include "../dlmalloc/dl_malloc.h"
18+
#include "caffeine-internal.h"
1819

1920
enum {
2021
UNRECOGNIZED_TYPE,
@@ -31,6 +32,7 @@ typedef void(*final_func_ptr)(void*, size_t) ;
3132
typedef uint8_t byte;
3233

3334
static void event_init(void);
35+
static void atomic_init(void);
3436

3537
// ---------------------------------------------------
3638
int caf_this_image(gex_TM_t tm) {
@@ -107,6 +109,8 @@ void caf_caffeinate(
107109
*non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0);
108110
mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size);
109111

112+
// init various subsystems:
113+
atomic_init();
110114
event_init();
111115
}
112116

@@ -306,6 +310,65 @@ void caf_event_wait(void *event_var_ptr, int64_t threshold, int segment_boundary
306310
assert(cnt >= threshold);
307311
}
308312

313+
// _______________________ Atomics ____________________________
314+
315+
#define OPMAP(name) [CAF_CONCAT2(CAF_OP_,name)] = CAF_CONCAT2(GEX_OP_,name)
316+
317+
static gex_OP_t const op_map[] = {
318+
OPMAP(GET),
319+
OPMAP(SET),
320+
OPMAP(ADD),
321+
OPMAP(AND),
322+
OPMAP(OR),
323+
OPMAP(XOR),
324+
OPMAP(FADD),
325+
OPMAP(FAND),
326+
OPMAP(FOR),
327+
OPMAP(FXOR),
328+
OPMAP(FCAS),
329+
};
330+
static gex_AD_t atomic_AD = GEX_AD_INVALID;
331+
332+
static void atomic_init(void) {
333+
assert(atomic_AD == GEX_AD_INVALID);
334+
335+
// create the atomic AD
336+
gex_AD_Create(&atomic_AD, myworldteam, GEX_DT_I64,
337+
GEX_OP_GET | GEX_OP_SET |
338+
GEX_OP_ADD | GEX_OP_FADD |
339+
GEX_OP_AND | GEX_OP_FAND |
340+
GEX_OP_OR | GEX_OP_FOR |
341+
GEX_OP_XOR | GEX_OP_FXOR |
342+
GEX_OP_FCAS,
343+
0); // TODO: allow user control over GEX_FLAG_AD_FAVOR_* flags?
344+
345+
assert(atomic_AD != GEX_AD_INVALID);
346+
}
347+
348+
void caf_atomic_int(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) {
349+
assert(atomic_AD != GEX_AD_INVALID);
350+
assert(addr);
351+
assert(opcode >= 0 && opcode < sizeof(op_map)/sizeof(op_map[0]));
352+
353+
gex_OP_t op = op_map[opcode];
354+
gex_Event_Wait(
355+
gex_AD_OpNB_I64(atomic_AD, result,
356+
image-1, addr,
357+
op, op1, op2,
358+
GEX_FLAG_RANK_IS_JOBRANK)
359+
);
360+
// DOB: We could very easily insert memory fencing into the AMO operation above
361+
// via GEX_FLAG_AD_ACQ | GEX_FLAG_AD_REL, incurring an associated performance penalty
362+
// (most notably for same-node images communicating via shared-memory transport).
363+
// However based on my reading of the informal hand-waving in F23 C.12.1 "Atomic memory consistency",
364+
// such fencing is neither required nor guaranteed by the language.
365+
// As such we leave the AMO unfenced and rely on the fences at surrounding
366+
// memory segment boundaries to provide the required ordering semantics.
367+
}
368+
369+
void caf_atomic_logical(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) {
370+
caf_atomic_int(opcode, image, addr, result, op1, op2);
371+
}
309372

310373
//-------------------------------------------------------------------
311374

0 commit comments

Comments
 (0)