Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 4d754bf

Browse files
committed
opal/pmix: add blocking Fence to SLURM components.
Blocking fence is used in yalla del proc. Native pmix exposes this functionality. We need to expose it for SLURM's s1/s2 components as well. Also this commit fixes uninitialized `rc` in fencenb's of both components. (cherry picked from commit open-mpi/ompi@72585a9)
1 parent bb1c4f3 commit 4d754bf

File tree

2 files changed

+64
-2
lines changed

2 files changed

+64
-2
lines changed

opal/mca/pmix/s1/pmix_s1.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ static int s1_abort(int flag, const char msg[],
3838
static int s1_commit(void);
3939
static int s1_fencenb(opal_list_t *procs, int collect_data,
4040
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
41+
static int s1_fence(opal_list_t *procs, int collect_data);
4142
static int s1_put(opal_pmix_scope_t scope,
4243
opal_value_t *kv);
4344
static int s1_get(const opal_process_name_t *id,
@@ -61,6 +62,7 @@ const opal_pmix_base_module_t opal_pmix_s1_module = {
6162
.abort = s1_abort,
6263
.commit = s1_commit,
6364
.fence_nb = s1_fencenb,
65+
.fence = s1_fence,
6466
.put = s1_put,
6567
.get = s1_get,
6668
.publish = s1_publish,
@@ -515,7 +517,7 @@ static int s1_commit(void)
515517
static void fencenb(int sd, short args, void *cbdata)
516518
{
517519
pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata;
518-
int rc;
520+
int rc = OPAL_SUCCESS;
519521
int32_t i;
520522
opal_value_t *kp, kvn;
521523
opal_hwloc_locality_t locality;
@@ -605,6 +607,35 @@ static int s1_fencenb(opal_list_t *procs, int collect_data,
605607
return OPAL_SUCCESS;
606608
}
607609

610+
#define S1_WAIT_FOR_COMPLETION(a) \
611+
do { \
612+
while ((a)) { \
613+
usleep(10); \
614+
} \
615+
} while (0)
616+
617+
struct fence_result {
618+
volatile int flag;
619+
int status;
620+
};
621+
622+
static void fence_release(int status, void *cbdata)
623+
{
624+
struct fence_result *res = (struct fence_result*)cbdata;
625+
res->status = status;
626+
opal_atomic_wmb();
627+
res->flag = 0;
628+
}
629+
630+
static int s1_fence(opal_list_t *procs, int collect_data)
631+
{
632+
struct fence_result result = { 1, OPAL_SUCCESS };
633+
s1_fencenb(procs, collect_data, fence_release, (void*)&result);
634+
S1_WAIT_FOR_COMPLETION(result.flag);
635+
return result.status;
636+
}
637+
638+
608639
static int s1_get(const opal_process_name_t *id,
609640
const char *key, opal_list_t *info,
610641
opal_value_t **kv)

opal/mca/pmix/s2/pmix_s2.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ static int s2_abort(int flag, const char msg[],
4545
static int s2_commit(void);
4646
static int s2_fencenb(opal_list_t *procs, int collect_data,
4747
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
48+
static int s2_fence(opal_list_t *procs, int collect_data);
4849
static int s2_put(opal_pmix_scope_t scope,
4950
opal_value_t *kv);
5051
static int s2_get(const opal_process_name_t *id,
@@ -68,6 +69,7 @@ const opal_pmix_base_module_t opal_pmix_s2_module = {
6869
.abort = s2_abort,
6970
.commit = s2_commit,
7071
.fence_nb = s2_fencenb,
72+
.fence = s2_fence,
7173
.put = s2_put,
7274
.get = s2_get,
7375
.publish = s2_publish,
@@ -534,7 +536,7 @@ static int s2_commit(void)
534536
static void fencenb(int sd, short args, void *cbdata)
535537
{
536538
pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata;
537-
int rc;
539+
int rc = OPAL_SUCCESS;
538540
int32_t i;
539541
opal_value_t *kp, kvn;
540542
opal_hwloc_locality_t locality;
@@ -624,6 +626,35 @@ static int s2_fencenb(opal_list_t *procs, int collect_data,
624626
return OPAL_SUCCESS;
625627
}
626628

629+
#define S2_WAIT_FOR_COMPLETION(a) \
630+
do { \
631+
while ((a)) { \
632+
usleep(10); \
633+
} \
634+
} while (0)
635+
636+
struct fence_result {
637+
volatile int flag;
638+
int status;
639+
};
640+
641+
static void fence_release(int status, void *cbdata)
642+
{
643+
struct fence_result *res = (struct fence_result*)cbdata;
644+
res->status = status;
645+
opal_atomic_wmb();
646+
res->flag = 0;
647+
}
648+
649+
static int s2_fence(opal_list_t *procs, int collect_data)
650+
{
651+
struct fence_result result = { 1, OPAL_SUCCESS };
652+
s2_fencenb(procs, collect_data, fence_release, (void*)&result);
653+
S2_WAIT_FOR_COMPLETION(result.flag);
654+
return result.status;
655+
}
656+
657+
627658
static int s2_get(const opal_process_name_t *id,
628659
const char *key, opal_list_t *info,
629660
opal_value_t **kv)

0 commit comments

Comments
 (0)