Skip to content

Commit 9f43b23

Browse files
committed
Merge pull request #1710 from hjelmn/ugni_atomics
Additional ugni atomics
2 parents e968ddf + 28dfa36 commit 9f43b23

File tree

5 files changed

+175
-25
lines changed

5 files changed

+175
-25
lines changed

opal/mca/btl/base/btl_base_frame.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ mca_base_var_enum_value_flag_t mca_btl_base_atomic_enum_flags[] = {
6161
{MCA_BTL_ATOMIC_SUPPORTS_AND, "and", 0},
6262
{MCA_BTL_ATOMIC_SUPPORTS_OR, "or", 0},
6363
{MCA_BTL_ATOMIC_SUPPORTS_XOR, "xor", 0},
64+
{MCA_BTL_ATOMIC_SUPPORTS_LAND, "land", 0},
65+
{MCA_BTL_ATOMIC_SUPPORTS_LOR, "lor", 0},
66+
{MCA_BTL_ATOMIC_SUPPORTS_LXOR, "lxor", 0},
67+
{MCA_BTL_ATOMIC_SUPPORTS_SWAP, "swap", 0},
68+
{MCA_BTL_ATOMIC_SUPPORTS_MIN, "min", 0},
69+
{MCA_BTL_ATOMIC_SUPPORTS_MAX, "max", 0},
6470
{MCA_BTL_ATOMIC_SUPPORTS_CSWAP, "compare-and-swap", 0},
6571
{MCA_BTL_ATOMIC_SUPPORTS_GLOB, "global"},
6672
{0, NULL, 0}

opal/mca/btl/btl.h

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
13+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1414
* reserved.
1515
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
1616
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
@@ -293,10 +293,44 @@ enum {
293293
MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400,
294294
/** The btl supports atomic bitwise exclusive or */
295295
MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800,
296+
297+
/** The btl supports logical and */
298+
MCA_BTL_ATOMIC_SUPPORTS_LAND = 0x00001000,
299+
/** The btl supports logical or */
300+
MCA_BTL_ATOMIC_SUPPORTS_LOR = 0x00002000,
301+
/** The btl supports logical exclusive or */
302+
MCA_BTL_ATOMIC_SUPPORTS_LXOR = 0x00004000,
303+
304+
/** The btl supports atomic swap */
305+
MCA_BTL_ATOMIC_SUPPORTS_SWAP = 0x00010000,
306+
307+
/** The btl supports atomic min */
308+
MCA_BTL_ATOMIC_SUPPORTS_MIN = 0x00100000,
309+
/** The btl supports atomic min */
310+
MCA_BTL_ATOMIC_SUPPORTS_MAX = 0x00200000,
311+
296312
/** The btl supports atomic compare-and-swap */
297313
MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x10000000,
314+
298315
/** The btl guarantees global atomicity (can mix btl atomics with cpu atomics) */
299316
MCA_BTL_ATOMIC_SUPPORTS_GLOB = 0x20000000,
317+
318+
319+
/** The btl supports 32-bit integer operations. Keep in mind the btl may
320+
* support only a subset of the available atomics. */
321+
MCA_BTL_ATOMIC_SUPPORTS_32BIT = 0x40000000,
322+
323+
/** The btl supports floating-point operations. Keep in mind the btl may
324+
* support only a subset of the available atomics and may not support
325+
* both 64 or 32-bit floating point. */
326+
MCA_BTL_ATOMIC_SUPPORTS_FLOAT = 0x80000000,
327+
};
328+
329+
enum {
330+
/** Use 32-bit atomics */
331+
MCA_BTL_ATOMIC_FLAG_32BIT = 0x00000001,
332+
/** Use floating-point atomics */
333+
MCA_BTL_ATOMIC_FLAG_FLOAT = 0x00000002,
300334
};
301335

302336
enum mca_btl_base_atomic_op_t {
@@ -308,6 +342,20 @@ enum mca_btl_base_atomic_op_t {
308342
MCA_BTL_ATOMIC_OR = 0x0012,
309343
/** Atomic xor: (*remote_address) = (*remote_address) ^ operand */
310344
MCA_BTL_ATOMIC_XOR = 0x0014,
345+
/** Atomic logical and: (*remote_address) = (*remote_address) && operand */
346+
MCA_BTL_ATOMIC_LAND = 0x0015,
347+
/** Atomic logical or: (*remote_address) = (*remote_address) || operand */
348+
MCA_BTL_ATOMIC_LOR = 0x0016,
349+
/** Atomic logical xor: (*remote_address) = (*remote_address) != operand */
350+
MCA_BTL_ATOMIC_LXOR = 0x0017,
351+
/** Atomic swap: (*remote_address) = operand */
352+
MCA_BTL_ATOMIC_SWAP = 0x001a,
353+
/** Atomic min */
354+
MCA_BTL_ATOMIC_MIN = 0x0020,
355+
/** Atomic max */
356+
MCA_BTL_ATOMIC_MAX = 0x0021,
357+
358+
MCA_BTL_ATOMIC_LAST,
311359
};
312360
typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t;
313361

@@ -977,7 +1025,7 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl,
9771025
* (remote_address, remote_address + 8)
9781026
* @param op (IN) Operation to perform
9791027
* @param operand (IN) Operand for the operation
980-
* @param flags (IN) Flags for this put operation
1028+
* @param flags (IN) Flags for this atomic operation
9811029
* @param order (IN) Ordering
9821030
* @param cbfunc (IN) Function to call on completion (if queued)
9831031
* @param cbcontext (IN) Context for the callback
@@ -1021,7 +1069,7 @@ typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_
10211069
* (remote_address, remote_address + 8)
10221070
* @param op (IN) Operation to perform
10231071
* @param operand (IN) Operand for the operation
1024-
* @param flags (IN) Flags for this put operation
1072+
* @param flags (IN) Flags for this atomic operation
10251073
* @param order (IN) Ordering
10261074
* @param cbfunc (IN) Function to call on completion (if queued)
10271075
* @param cbcontext (IN) Context for the callback
@@ -1067,7 +1115,7 @@ typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module
10671115
* (remote_address, remote_address + 8)
10681116
* @param compare (IN) Operand for the operation
10691117
* @param value (IN) Value to store on success
1070-
* @param flags (IN) Flags for this put operation
1118+
* @param flags (IN) Flags for this atomic operation
10711119
* @param order (IN) Ordering
10721120
* @param cbfunc (IN) Function to call on completion (if queued)
10731121
* @param cbcontext (IN) Context for the callback

opal/mca/btl/openib/btl_openib_atomic.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ int mca_btl_openib_atomic_fop (struct mca_btl_base_module_t *btl, struct mca_btl
112112
void *cbcontext, void *cbdata)
113113
{
114114

115-
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op)) {
115+
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op || (MCA_BTL_ATOMIC_FLAG_32BIT & flags))) {
116116
return OPAL_ERR_NOT_SUPPORTED;
117117
}
118118

@@ -128,6 +128,10 @@ int mca_btl_openib_atomic_cswap (struct mca_btl_base_module_t *btl, struct mca_b
128128
uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
129129
void *cbcontext, void *cbdata)
130130
{
131+
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_FLAG_32BIT & flags)) {
132+
return OPAL_ERR_NOT_SUPPORTED;
133+
}
134+
131135
return mca_btl_openib_atomic_internal (btl, endpoint, local_address, remote_address, local_handle,
132136
remote_handle, IBV_WR_ATOMIC_CMP_AND_SWP, compare, value,
133137
flags, order, cbfunc, cbcontext, cbdata);

opal/mca/btl/ugni/btl_ugni_atomic.c

Lines changed: 105 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
3+
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* $COPYRIGHT$
66
*
@@ -11,18 +11,66 @@
1111

1212
#include "btl_ugni_rdma.h"
1313

14-
static gni_fma_cmd_type_t famo_cmds[] = {
15-
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD,
16-
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND,
17-
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR,
18-
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR,
14+
static gni_fma_cmd_type_t amo_cmds[][MCA_BTL_ATOMIC_LAST] = {
15+
[OPAL_INT32] = {
16+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_IADD_S,
17+
[MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_AND_S,
18+
[MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_OR_S,
19+
[MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_XOR_S,
20+
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP_S,
21+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN_S,
22+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX_S,
23+
},
24+
[OPAL_INT64] = {
25+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD,
26+
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND,
27+
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR,
28+
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR,
29+
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP,
30+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN,
31+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX,
32+
},
33+
[OPAL_FLOAT] = {
34+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD_S,
35+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN_S,
36+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX_S,
37+
},
38+
[OPAL_DOUBLE] = {
39+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD,
40+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN,
41+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX,
42+
},
1943
};
2044

21-
static gni_fma_cmd_type_t amo_cmds[] = {
22-
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD,
23-
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND,
24-
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR,
25-
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR,
45+
static gni_fma_cmd_type_t famo_cmds[][MCA_BTL_ATOMIC_LAST] = {
46+
[OPAL_INT32] = {
47+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FIADD_S,
48+
[MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_FAND_S,
49+
[MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_FOR_S,
50+
[MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_FXOR_S,
51+
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP_S,
52+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN_S,
53+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX_S,
54+
},
55+
[OPAL_INT64] = {
56+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD,
57+
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND,
58+
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR,
59+
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR,
60+
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP,
61+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN,
62+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX,
63+
},
64+
[OPAL_FLOAT] = {
65+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD_S,
66+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN_S,
67+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX_S,
68+
},
69+
[OPAL_DOUBLE] = {
70+
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD,
71+
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN,
72+
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX,
73+
},
2674
};
2775

2876
int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@@ -32,7 +80,20 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
3280
{
3381
gni_mem_handle_t dummy = {0, 0};
3482
mca_btl_ugni_post_descriptor_t *post_desc;
35-
int rc;
83+
int gni_op, rc, type;
84+
size_t size;
85+
86+
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
87+
if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) {
88+
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE;
89+
} else {
90+
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64;
91+
}
92+
93+
gni_op = amo_cmds[type][op];
94+
if (0 == gni_op) {
95+
return OPAL_ERR_NOT_SUPPORTED;
96+
}
3697

3798
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
3899
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@@ -45,15 +106,19 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
45106
}
46107

47108
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address,
48-
remote_handle->gni_handle, 8, 0);
49-
post_desc->desc.base.amo_cmd = amo_cmds[op];
109+
remote_handle->gni_handle, size, 0);
110+
post_desc->desc.base.amo_cmd = gni_op;
50111

51112
post_desc->desc.base.first_operand = operand;
52113

53114
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
54115
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
55116
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
56117
if (GNI_RC_SUCCESS != rc) {
118+
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
119+
if (GNI_RC_ILLEGAL_OP == rc) {
120+
return OPAL_ERR_NOT_SUPPORTED;
121+
}
57122
return OPAL_ERR_OUT_OF_RESOURCE;
58123
}
59124

@@ -67,7 +132,20 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
67132
void *cbcontext, void *cbdata)
68133
{
69134
mca_btl_ugni_post_descriptor_t *post_desc;
70-
int rc;
135+
int gni_op, rc, type;
136+
size_t size;
137+
138+
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
139+
if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) {
140+
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE;
141+
} else {
142+
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64;
143+
}
144+
145+
gni_op = famo_cmds[type][op];
146+
if (0 == gni_op) {
147+
return OPAL_ERR_NOT_SUPPORTED;
148+
}
71149

72150
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
73151
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@@ -81,8 +159,8 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
81159

82160

83161
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
84-
remote_address, remote_handle->gni_handle, 8, 0);
85-
post_desc->desc.base.amo_cmd = famo_cmds[op];
162+
remote_address, remote_handle->gni_handle, size, 0);
163+
post_desc->desc.base.amo_cmd = gni_op;
86164

87165
post_desc->desc.base.first_operand = operand;
88166

@@ -91,6 +169,9 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
91169
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
92170
if (GNI_RC_SUCCESS != rc) {
93171
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
172+
if (GNI_RC_ILLEGAL_OP == rc) {
173+
return OPAL_ERR_NOT_SUPPORTED;
174+
}
94175
return OPAL_ERR_OUT_OF_RESOURCE;
95176
}
96177

@@ -103,7 +184,11 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
103184
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
104185
{
105186
mca_btl_ugni_post_descriptor_t *post_desc;
106-
int rc;
187+
int gni_op, rc;
188+
size_t size;
189+
190+
gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP;
191+
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
107192

108193
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
109194
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@@ -117,8 +202,8 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
117202

118203

119204
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
120-
remote_address, remote_handle->gni_handle, 8, 0);
121-
post_desc->desc.base.amo_cmd = GNI_FMA_ATOMIC_CSWAP;
205+
remote_address, remote_handle->gni_handle, size, 0);
206+
post_desc->desc.base.amo_cmd = gni_op;
122207

123208
post_desc->desc.base.first_operand = compare;
124209
post_desc->desc.base.second_operand = value;

opal/mca/btl/ugni/btl_ugni_component.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,13 @@ btl_ugni_component_register(void)
288288
MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR |
289289
MCA_BTL_ATOMIC_SUPPORTS_CSWAP;
290290

291+
if (GNI_DEVICE_ARIES == device_type) {
292+
/* aries supports additional atomic operations */
293+
mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX |
294+
MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR |
295+
MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT;
296+
}
297+
291298
mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
292299

293300
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */

0 commit comments

Comments
 (0)