Skip to content

Commit c2fa86d

Browse files
author
rhc54
committed
Merge pull request open-mpi#822 from hjelmn/1.10_osc_sm_fix
1.10 osc sm fixes
2 parents 562cddb + 8f19c79 commit c2fa86d

File tree

3 files changed

+254
-108
lines changed

3 files changed

+254
-108
lines changed

ompi/mca/osc/sm/osc_sm.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ struct ompi_osc_sm_lock_t {
3939
typedef struct ompi_osc_sm_lock_t ompi_osc_sm_lock_t;
4040

4141
struct ompi_osc_sm_node_state_t {
42-
int32_t post_count;
4342
int32_t complete_count;
4443
ompi_osc_sm_lock_t lock;
4544
opal_atomic_lock_t accumulate_lock;
@@ -84,6 +83,9 @@ struct ompi_osc_sm_module_t {
8483
ompi_osc_sm_global_state_t *global_state;
8584
ompi_osc_sm_node_state_t *my_node_state;
8685
ompi_osc_sm_node_state_t *node_states;
86+
uint64_t **posts;
87+
88+
opal_mutex_t lock;
8789
};
8890
typedef struct ompi_osc_sm_module_t ompi_osc_sm_module_t;
8991

ompi/mca/osc/sm/osc_sm_active_target.c

Lines changed: 179 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
33
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
4-
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
4+
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
55
* reserved.
66
* $COPYRIGHT$
77
*
@@ -19,6 +19,74 @@
1919

2020
#include "osc_sm.h"
2121

22+
/**
23+
* compare_ranks:
24+
*
25+
* @param[in] ptra Pointer to integer item
26+
* @param[in] ptrb Pointer to integer item
27+
*
28+
* @returns 0 if *ptra == *ptrb
29+
* @returns -1 if *ptra < *ptrb
30+
* @returns 1 otherwise
31+
*
32+
* This function is used to sort the rank list. It can be removed if
33+
* groups are always in order.
34+
*/
35+
static int compare_ranks (const void *ptra, const void *ptrb)
36+
{
37+
int a = *((int *) ptra);
38+
int b = *((int *) ptrb);
39+
40+
if (a < b) {
41+
return -1;
42+
} else if (a > b) {
43+
return 1;
44+
}
45+
46+
return 0;
47+
}
48+
49+
/**
50+
* ompi_osc_pt2pt_get_comm_ranks:
51+
*
52+
* @param[in] module - OSC PT2PT module
53+
* @param[in] sub_group - Group with ranks to translate
54+
*
55+
* @returns an array of translated ranks on success or NULL on failure
56+
*
57+
* Translate the ranks given in {sub_group} into ranks in the
58+
* communicator used to create {module}.
59+
*/
60+
static int *ompi_osc_sm_group_ranks (ompi_group_t *group, ompi_group_t *sub_group)
61+
{
62+
int size = ompi_group_size(sub_group);
63+
int *ranks1, *ranks2;
64+
int ret;
65+
66+
ranks1 = calloc (size, sizeof(int));
67+
ranks2 = calloc (size, sizeof(int));
68+
if (NULL == ranks1 || NULL == ranks2) {
69+
free (ranks1);
70+
free (ranks2);
71+
return NULL;
72+
}
73+
74+
for (int i = 0 ; i < size ; ++i) {
75+
ranks1[i] = i;
76+
}
77+
78+
ret = ompi_group_translate_ranks (sub_group, size, ranks1, group, ranks2);
79+
free (ranks1);
80+
if (OMPI_SUCCESS != ret) {
81+
free (ranks2);
82+
return NULL;
83+
}
84+
85+
qsort (ranks2, size, sizeof (int), compare_ranks);
86+
87+
return ranks2;
88+
}
89+
2290

2391
int
2492
ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
@@ -51,28 +119,50 @@ ompi_osc_sm_fence(int assert, struct ompi_win_t *win)
51119
}
52120
}
53121

54-
55122
int
56123
ompi_osc_sm_start(struct ompi_group_t *group,
57124
int assert,
58125
struct ompi_win_t *win)
59126
{
60127
ompi_osc_sm_module_t *module =
61128
(ompi_osc_sm_module_t*) win->w_osc_module;
129+
int my_rank = ompi_comm_rank (module->comm);
130+
131+
OBJ_RETAIN(group);
132+
133+
if (!OPAL_ATOMIC_CMPSET(&module->start_group, NULL, group)) {
134+
OBJ_RELEASE(group);
135+
return OMPI_ERR_RMA_SYNC;
136+
}
62137

63138
if (0 == (assert & MPI_MODE_NOCHECK)) {
64139
int size;
65140

66-
OBJ_RETAIN(group);
67-
module->start_group = group;
141+
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
142+
if (NULL == ranks) {
143+
return OMPI_ERR_OUT_OF_RESOURCE;
144+
}
145+
68146
size = ompi_group_size(module->start_group);
69147

70-
while (module->my_node_state->post_count != size) {
71-
opal_progress();
72-
opal_atomic_mb();
73-
}
74-
} else {
75-
module->start_group = NULL;
148+
for (int i = 0 ; i < size ; ++i) {
149+
int rank_byte = ranks[i] >> 6;
150+
uint64_t old, rank_bit = ((uint64_t) 1) << (ranks[i] & 0x3f);
151+
152+
/* wait for rank to post */
153+
while (!(module->posts[my_rank][rank_byte] & rank_bit)) {
154+
opal_progress();
155+
opal_atomic_mb();
156+
}
157+
158+
opal_atomic_rmb ();
159+
160+
do {
161+
old = module->posts[my_rank][rank_byte];
162+
} while (!opal_atomic_cmpset_64 ((int64_t *) module->posts[my_rank] + rank_byte, old, old ^ rank_bit));
163+
}
164+
165+
free (ranks);
76166
}
77167

78168
opal_atomic_mb();
@@ -85,30 +175,33 @@ ompi_osc_sm_complete(struct ompi_win_t *win)
85175
{
86176
ompi_osc_sm_module_t *module =
87177
(ompi_osc_sm_module_t*) win->w_osc_module;
88-
int gsize, csize;
178+
ompi_group_t *group;
179+
int gsize;
89180

90181
/* ensure all memory operations have completed */
91182
opal_atomic_mb();
92183

93-
if (NULL != module->start_group) {
94-
module->my_node_state->post_count = 0;
95-
opal_atomic_mb();
184+
group = module->start_group;
185+
if (NULL == group || !OPAL_ATOMIC_CMPSET(&module->start_group, group, NULL)) {
186+
return OMPI_ERR_RMA_SYNC;
187+
}
96188

97-
gsize = ompi_group_size(module->start_group);
98-
csize = ompi_comm_size(module->comm);
99-
for (int i = 0 ; i < gsize ; ++i) {
100-
for (int j = 0 ; j < csize ; ++j) {
101-
if (ompi_group_peer_lookup(module->start_group, i) ==
102-
ompi_comm_peer_lookup(module->comm, j)) {
103-
opal_atomic_add_32(&module->node_states[j].complete_count, 1);
104-
}
105-
}
106-
}
189+
opal_atomic_mb();
107190

108-
OBJ_RELEASE(module->start_group);
109-
module->start_group = NULL;
191+
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
192+
if (NULL == ranks) {
193+
return OMPI_ERR_OUT_OF_RESOURCE;
110194
}
111195

196+
gsize = ompi_group_size(group);
197+
for (int i = 0 ; i < gsize ; ++i) {
198+
(void) opal_atomic_add_32(&module->node_states[ranks[i]].complete_count, 1);
199+
}
200+
201+
free (ranks);
202+
203+
OBJ_RELEASE(group);
204+
112205
opal_atomic_mb();
113206
return OMPI_SUCCESS;
114207
}
@@ -121,29 +214,45 @@ ompi_osc_sm_post(struct ompi_group_t *group,
121214
{
122215
ompi_osc_sm_module_t *module =
123216
(ompi_osc_sm_module_t*) win->w_osc_module;
124-
int gsize, csize;
217+
int my_rank = ompi_comm_rank (module->comm);
218+
int my_byte = my_rank >> 6;
219+
uint64_t my_bit = ((uint64_t) 1) << (my_rank & 0x3f);
220+
int gsize;
221+
222+
OPAL_THREAD_LOCK(&module->lock);
223+
224+
if (NULL != module->post_group) {
225+
OPAL_THREAD_UNLOCK(&module->lock);
226+
return OMPI_ERR_RMA_SYNC;
227+
}
228+
229+
module->post_group = group;
230+
231+
OBJ_RETAIN(group);
125232

126233
if (0 == (assert & MPI_MODE_NOCHECK)) {
127-
OBJ_RETAIN(group);
128-
module->post_group = group;
234+
int *ranks = ompi_osc_sm_group_ranks (module->comm->c_local_group, group);
235+
if (NULL == ranks) {
236+
return OMPI_ERR_OUT_OF_RESOURCE;
237+
}
129238

130239
module->my_node_state->complete_count = 0;
131240
opal_atomic_mb();
132241

133242
gsize = ompi_group_size(module->post_group);
134-
csize = ompi_comm_size(module->comm);
135243
for (int i = 0 ; i < gsize ; ++i) {
136-
for (int j = 0 ; j < csize ; ++j) {
137-
if (ompi_group_peer_lookup(module->post_group, i) ==
138-
ompi_comm_peer_lookup(module->comm, j)) {
139-
opal_atomic_add_32(&module->node_states[j].post_count, 1);
140-
}
141-
}
244+
(void) opal_atomic_add_64 ((int64_t *) module->posts[ranks[i]] + my_byte, my_bit);
142245
}
143-
} else {
144-
module->post_group = NULL;
246+
247+
opal_atomic_wmb ();
248+
249+
free (ranks);
250+
251+
opal_progress ();
145252
}
146253

254+
OPAL_THREAD_UNLOCK(&module->lock);
255+
147256
return OMPI_SUCCESS;
148257
}
149258

@@ -153,19 +262,29 @@ ompi_osc_sm_wait(struct ompi_win_t *win)
153262
{
154263
ompi_osc_sm_module_t *module =
155264
(ompi_osc_sm_module_t*) win->w_osc_module;
265+
ompi_group_t *group;
156266

157-
if (NULL != module->post_group) {
158-
int size = ompi_group_size(module->post_group);
267+
OPAL_THREAD_LOCK(&module->lock);
159268

160-
while (module->my_node_state->complete_count != size) {
161-
opal_progress();
162-
opal_atomic_mb();
163-
}
269+
if (NULL == module->post_group) {
270+
OPAL_THREAD_UNLOCK(&module->lock);
271+
return OMPI_ERR_RMA_SYNC;
272+
}
164273

165-
OBJ_RELEASE(module->post_group);
166-
module->post_group = NULL;
274+
group = module->post_group;
275+
276+
int size = ompi_group_size (group);
277+
278+
while (module->my_node_state->complete_count != size) {
279+
opal_progress();
280+
opal_atomic_mb();
167281
}
168282

283+
OBJ_RELEASE(group);
284+
module->post_group = NULL;
285+
286+
OPAL_THREAD_UNLOCK(&module->lock);
287+
169288
/* ensure all memory operations have completed */
170289
opal_atomic_mb();
171290

@@ -180,19 +299,25 @@ ompi_osc_sm_test(struct ompi_win_t *win,
180299
ompi_osc_sm_module_t *module =
181300
(ompi_osc_sm_module_t*) win->w_osc_module;
182301

183-
if (NULL != module->post_group) {
184-
int size = ompi_group_size(module->post_group);
302+
OPAL_THREAD_LOCK(&module->lock);
185303

186-
if (module->my_node_state->complete_count == size) {
187-
OBJ_RELEASE(module->post_group);
188-
module->post_group = NULL;
189-
*flag = 1;
190-
}
304+
if (NULL == module->post_group) {
305+
OPAL_THREAD_UNLOCK(&module->lock);
306+
return OMPI_ERR_RMA_SYNC;
307+
}
308+
309+
int size = ompi_group_size(module->post_group);
310+
311+
if (module->my_node_state->complete_count == size) {
312+
OBJ_RELEASE(module->post_group);
313+
module->post_group = NULL;
314+
*flag = 1;
191315
} else {
192-
opal_atomic_mb();
193316
*flag = 0;
194317
}
195318

319+
OPAL_THREAD_UNLOCK(&module->lock);
320+
196321
/* ensure all memory operations have completed */
197322
opal_atomic_mb();
198323

0 commit comments

Comments
 (0)