Skip to content

Commit f77fd3e

Browse files
committed
Implement PRIF Atomics
All AMOs are funneled down to one GASNet-EX remote atomic domain for 64-bit integers to ensure AMO coherence. New cross-language header file caffeine-internal.h ensures consistent operation numbering between the Fortran and C code sides of Caffeine, and a static lookup table is used to convert that operation number to the corresponding gex_OP_t value. Fortran-side implementation of the PRIF subroutines is heavily macroized, factoring the logic to ensure consistency across operations and variants. This commit is a great example of why Fortran needs a standardized preprocessor; thanks to function-like macros, the real working implementation in src/caffeine/atomic_s.F90 is shorter than the unimplemented stubs it replaces!
1 parent 524eb6e commit f77fd3e

File tree

5 files changed

+185
-112
lines changed

5 files changed

+185
-112
lines changed

src/caffeine/atomic_s.F90

Lines changed: 63 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,122 +1,74 @@
11
! Copyright (c), The Regents of the University of California
22
! Terms of use are as specified in LICENSE.txt
3+
4+
#include "assert_macros.h"
5+
#include "caffeine-internal.h"
6+
37
submodule(prif:prif_private_s) atomic_s
48
! DO NOT ADD USE STATEMENTS HERE
59
! All use statements belong in prif_private_s.F90
610
implicit none
711

12+
! placeholder variables that simplify the macro logic below
13+
integer(PRIF_ATOMIC_INT_KIND) :: dummyti
14+
logical(PRIF_ATOMIC_LOGICAL_KIND) :: dummytl
15+
integer(PRIF_ATOMIC_INT_KIND), parameter :: dummyvi = 0
16+
logical(PRIF_ATOMIC_LOGICAL_KIND), parameter :: dummyvl = .false.
817
contains
918

10-
module procedure prif_atomic_add
11-
call unimplemented("prif_atomic_add")
12-
end procedure
13-
14-
module procedure prif_atomic_add_indirect
15-
call unimplemented("prif_atomic_add_indirect")
16-
end procedure
17-
18-
module procedure prif_atomic_and
19-
call unimplemented("prif_atomic_and")
20-
end procedure
21-
22-
module procedure prif_atomic_and_indirect
23-
call unimplemented("prif_atomic_and_indirect")
24-
end procedure
25-
26-
module procedure prif_atomic_or
27-
call unimplemented("prif_atomic_or")
28-
end procedure
29-
30-
module procedure prif_atomic_or_indirect
31-
call unimplemented("prif_atomic_or_indirect")
32-
end procedure
33-
34-
module procedure prif_atomic_xor
35-
call unimplemented("prif_atomic_xor")
36-
end procedure
37-
38-
module procedure prif_atomic_xor_indirect
39-
call unimplemented("prif_atomic_xor_indirect")
40-
end procedure
41-
42-
module procedure prif_atomic_cas_int
43-
call unimplemented("prif_atomic_cas_int")
44-
end procedure
45-
46-
module procedure prif_atomic_cas_int_indirect
47-
call unimplemented("prif_atomic_cas_int_indirect")
48-
end procedure
49-
50-
module procedure prif_atomic_cas_logical
51-
call unimplemented("prif_atomic_cas_logical")
52-
end procedure
53-
54-
module procedure prif_atomic_cas_logical_indirect
55-
call unimplemented("prif_atomic_cas_logical_indirect")
56-
end procedure
57-
58-
module procedure prif_atomic_fetch_add
59-
call unimplemented("prif_atomic_fetch_add")
60-
end procedure
61-
62-
module procedure prif_atomic_fetch_add_indirect
63-
call unimplemented("prif_atomic_fetch_add_indirect")
64-
end procedure
65-
66-
module procedure prif_atomic_fetch_and
67-
call unimplemented("prif_atomic_fetch_and")
68-
end procedure
69-
70-
module procedure prif_atomic_fetch_and_indirect
71-
call unimplemented("prif_atomic_fetch_and_indirect")
72-
end procedure
73-
74-
module procedure prif_atomic_fetch_or
75-
call unimplemented("prif_atomic_fetch_or")
76-
end procedure
77-
78-
module procedure prif_atomic_fetch_or_indirect
79-
call unimplemented("prif_atomic_fetch_or_indirect")
80-
end procedure
81-
82-
module procedure prif_atomic_fetch_xor
83-
call unimplemented("prif_atomic_fetch_xor")
84-
end procedure
85-
86-
module procedure prif_atomic_fetch_xor_indirect
87-
call unimplemented("prif_atomic_fetch_xor_indirect")
88-
end procedure
89-
90-
module procedure prif_atomic_define_int
91-
call unimplemented("prif_atomic_define_int")
92-
end procedure
93-
94-
module procedure prif_atomic_define_int_indirect
95-
call unimplemented("prif_atomic_define_int_indirect")
96-
end procedure
97-
98-
module procedure prif_atomic_define_logical
99-
call unimplemented("prif_atomic_define_logical")
100-
end procedure
101-
102-
module procedure prif_atomic_define_logical_indirect
103-
call unimplemented("prif_atomic_define_logical_indirect")
104-
end procedure
105-
106-
module procedure prif_atomic_ref_int
107-
call unimplemented("prif_atomic_ref_int")
108-
end procedure
109-
110-
module procedure prif_atomic_ref_int_indirect
111-
call unimplemented("prif_atomic_ref_int_indirect")
112-
end procedure
113-
114-
module procedure prif_atomic_ref_logical
115-
call unimplemented("prif_atomic_ref_logical")
116-
end procedure
117-
118-
module procedure prif_atomic_ref_logical_indirect
119-
call unimplemented("prif_atomic_ref_logical_indirect")
120-
end procedure
19+
#define ATOMIC_OP(OPNAME, OPCODE, caf_op) \
20+
module procedure CAF_CONCAT2(prif_atomic_,OPNAME) ; \
21+
integer(c_intptr_t) :: remote_base; \
22+
call_assert(offset >= 0); \
23+
call base_pointer(coarray_handle, image_num, remote_base); \
24+
call CAF_CONCAT3(prif_atomic_,OPNAME,_indirect) \
25+
( image_num, remote_base + offset, OPPASSI, stat ); \
26+
end procedure ; \
27+
module procedure CAF_CONCAT3(prif_atomic_,OPNAME,_indirect) ; \
28+
call_assert(c_sizeof(dummyti) == 8); call_assert(c_sizeof(dummytl) == 8); \
29+
call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range"); \
30+
call caf_op(CAF_CONCAT2(CAF_OP_,OPCODE), image_num, atom_remote_ptr, OPPASSC); \
31+
if (present(stat)) stat = 0; \
32+
end procedure
33+
34+
! Extra arg below is another workaround gfortran's sub-standard preprocessor
35+
#define ATOMIC_INT_OP(OPNAME,_,OPCODE) ATOMIC_OP(OPNAME,OPCODE,caf_atomic_int)
36+
#define ATOMIC_LOG_OP(OPNAME,_,OPCODE) ATOMIC_OP(OPNAME,OPCODE,caf_atomic_logical)
37+
38+
#undef OPPASSI
39+
#define OPPASSI value
40+
#undef OPPASSC
41+
#define OPPASSC value, dummyvi, dummyvi
42+
ATOMIC_INT_OP(ref_int, ,GET)
43+
#undef OPPASSC
44+
#define OPPASSC value, dummyvl, dummyvl
45+
ATOMIC_LOG_OP(ref_logical, ,GET)
46+
#undef OPPASSC
47+
#define OPPASSC dummytl, value, dummyvl
48+
ATOMIC_LOG_OP(define_logical, ,SET)
49+
#undef OPPASSC
50+
#define OPPASSC dummyti, value, dummyvi
51+
ATOMIC_INT_OP(define_int, ,SET)
52+
53+
ATOMIC_INT_OP(add, ,ADD)
54+
ATOMIC_INT_OP(and, ,AND)
55+
ATOMIC_INT_OP(or, ,OR)
56+
ATOMIC_INT_OP(xor, ,XOR)
57+
58+
#undef OPPASSI
59+
#define OPPASSI value, old
60+
#undef OPPASSC
61+
#define OPPASSC old, value, dummyvi
62+
ATOMIC_INT_OP(fetch_add, ,FADD)
63+
ATOMIC_INT_OP(fetch_and, ,FAND)
64+
ATOMIC_INT_OP(fetch_or, ,FOR)
65+
ATOMIC_INT_OP(fetch_xor, ,FXOR)
66+
67+
#undef OPPASSI
68+
#define OPPASSI old, compare, new
69+
#undef OPPASSC
70+
#define OPPASSC old, compare, new
71+
ATOMIC_INT_OP(cas_int, ,FCAS)
72+
ATOMIC_LOG_OP(cas_logical, ,FCAS)
12173

12274
end submodule atomic_s

src/caffeine/caffeine-internal.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# /* Copyright (c), The Regents of the University of California */
2+
# /* Terms of use are as specified in LICENSE.txt */
3+
4+
# /* NOTE: this is a dual-language header file, */
5+
# /* and should ONLY contain portable preprocessor directives. */
6+
7+
# /* define some macro portability helpers */
8+
#if defined(__GFORTRAN__) || defined(_CRAYFTN) || defined(NAGFOR)
9+
# define CAF_CONCAT2(x,y) x/**/y
10+
# define CAF_CONCAT3(x,y,z) x/**/y/**/z
11+
# define CAF_STRINGIFY_HELPER(x) "x"
12+
#else
13+
# define CAF_CONCAT2(x,y) x##y
14+
# define CAF_CONCAT3(x,y,z) x##y##z
15+
# define CAF_STRINGIFY_HELPER(x) #x
16+
#endif
17+
#define CAF_STRINGIFY(x) CAF_STRINGIFY_HELPER(x)
18+
19+
# /* AMO support defines */
20+
21+
#define CAF_OP_GET 0
22+
#define CAF_OP_SET 1
23+
#define CAF_OP_ADD 2
24+
#define CAF_OP_AND 3
25+
#define CAF_OP_OR 4
26+
#define CAF_OP_XOR 5
27+
#define CAF_OP_FADD 6
28+
#define CAF_OP_FAND 7
29+
#define CAF_OP_FOR 8
30+
#define CAF_OP_FXOR 9
31+
#define CAF_OP_FCAS 10
32+

src/caffeine/caffeine.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <ISO_Fortran_binding.h>
1616
#include "../dlmalloc/dl_malloc_caf.h"
1717
#include "../dlmalloc/dl_malloc.h"
18+
#include "caffeine-internal.h"
1819

1920
enum {
2021
UNRECOGNIZED_TYPE,
@@ -31,6 +32,7 @@ typedef void(*final_func_ptr)(void*, size_t) ;
3132
typedef uint8_t byte;
3233

3334
static void event_init(void);
35+
static void atomic_init(void);
3436

3537
// ---------------------------------------------------
3638
int caf_this_image(gex_TM_t tm) {
@@ -107,6 +109,8 @@ void caf_caffeinate(
107109
*non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0);
108110
mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size);
109111

112+
// init various subsystems:
113+
atomic_init();
110114
event_init();
111115
}
112116

@@ -306,6 +310,65 @@ void caf_event_wait(void *event_var_ptr, int64_t threshold, int segment_boundary
306310
assert(cnt >= threshold);
307311
}
308312

313+
// _______________________ Atomics ____________________________
314+
315+
#define OPMAP(name) [CAF_CONCAT2(CAF_OP_,name)] = CAF_CONCAT2(GEX_OP_,name)
316+
317+
static gex_OP_t const op_map[] = {
318+
OPMAP(GET),
319+
OPMAP(SET),
320+
OPMAP(ADD),
321+
OPMAP(AND),
322+
OPMAP(OR),
323+
OPMAP(XOR),
324+
OPMAP(FADD),
325+
OPMAP(FAND),
326+
OPMAP(FOR),
327+
OPMAP(FXOR),
328+
OPMAP(FCAS),
329+
};
330+
static gex_AD_t atomic_AD = GEX_AD_INVALID;
331+
332+
static void atomic_init(void) {
333+
assert(atomic_AD == GEX_AD_INVALID);
334+
335+
// create the atomic AD
336+
gex_AD_Create(&atomic_AD, myworldteam, GEX_DT_I64,
337+
GEX_OP_GET | GEX_OP_SET |
338+
GEX_OP_ADD | GEX_OP_FADD |
339+
GEX_OP_AND | GEX_OP_FAND |
340+
GEX_OP_OR | GEX_OP_FOR |
341+
GEX_OP_XOR | GEX_OP_FXOR |
342+
GEX_OP_FCAS,
343+
0); // TODO: allow user control over GEX_FLAG_AD_FAVOR_* flags?
344+
345+
assert(atomic_AD != GEX_AD_INVALID);
346+
}
347+
348+
void caf_atomic_int(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) {
349+
assert(atomic_AD != GEX_AD_INVALID);
350+
assert(addr);
351+
assert(opcode >= 0 && opcode < sizeof(op_map)/sizeof(op_map[0]));
352+
353+
gex_OP_t op = op_map[opcode];
354+
gex_Event_Wait(
355+
gex_AD_OpNB_I64(atomic_AD, result,
356+
image-1, addr,
357+
op, op1, op2,
358+
GEX_FLAG_RANK_IS_JOBRANK)
359+
);
360+
// DOB: We could very easily insert memory fencing into the AMO operation above
361+
// via GEX_FLAG_AD_ACQ | GEX_FLAG_AD_REL, incurring an associated performance penalty
362+
// (most notably for same-node images communicating via shared-memory transport).
363+
// However based on my reading of the informal hand-waving in F23 C.12.1 "Atomic memory consistency",
364+
// such fencing is neither required nor guaranteed by the language.
365+
// As such we leave the AMO unfenced and rely on the fences at surrounding
366+
// memory segment boundaries to provide the required ordering semantics.
367+
}
368+
369+
void caf_atomic_logical(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) {
370+
caf_atomic_int(opcode, image, addr, result, op1, op2);
371+
}
309372

310373
//-------------------------------------------------------------------
311374

src/caffeine/prif_private_s.F90

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,32 @@ subroutine caf_event_query(event_var_ptr, count) bind(c)
243243
integer(c_int64_t), intent(out) :: count
244244
end subroutine
245245

246+
! _______________________ Atomics ____________________________
247+
248+
subroutine caf_atomic_int(opcode, image, addr, result, operand1, operand2) bind(c)
249+
!! void caf_atomic_int(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2)
250+
import c_int, c_intptr_t, PRIF_ATOMIC_INT_KIND
251+
implicit none
252+
integer(c_int), intent(in), value :: opcode
253+
integer(c_int), intent(in), value :: image
254+
integer(c_intptr_t), intent(in), value :: addr
255+
integer(PRIF_ATOMIC_INT_KIND), intent(out) :: result
256+
integer(PRIF_ATOMIC_INT_KIND), intent(in), value :: operand1
257+
integer(PRIF_ATOMIC_INT_KIND), intent(in), value :: operand2
258+
end subroutine
259+
260+
subroutine caf_atomic_logical(opcode, image, addr, result, operand1, operand2) bind(c)
261+
!! void caf_atomic_logical(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2)
262+
import c_int, c_intptr_t, PRIF_ATOMIC_LOGICAL_KIND
263+
implicit none
264+
integer(c_int), intent(in), value :: opcode
265+
integer(c_int), intent(in), value :: image
266+
integer(c_intptr_t), intent(in), value :: addr
267+
logical(PRIF_ATOMIC_LOGICAL_KIND), intent(out) :: result
268+
logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in), value :: operand1
269+
logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in), value :: operand2
270+
end subroutine
271+
246272
! ______________ Collective Subroutines __________________
247273

248274
subroutine caf_co_broadcast(a, source_image, Nelem, team) bind(C)

src/prif.F90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ module prif
5252
integer(c_int), parameter, public :: PRIF_ATOMIC_INT_KIND = c_int64_t
5353

5454
#if HAVE_SELECTED_LOGICAL_KIND
55-
integer(c_int), parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = selected_logical_kind(32)
55+
integer(c_int), parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = selected_logical_kind(64)
5656
#else
5757
integer(c_int), parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = PRIF_ATOMIC_INT_KIND
5858
#endif

0 commit comments

Comments
 (0)