Skip to content

Commit a4abcdc

Browse files
committed
Pass group info in PMIx server callback
Ensures that any provided values can be found. Signed-off-by: Ralph Castain <[email protected]>
1 parent ac36457 commit a4abcdc

File tree

1 file changed

+76
-38
lines changed

1 file changed

+76
-38
lines changed

src/mca/grpcomm/direct/grpcomm_direct_group.c

Lines changed: 76 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
99
* Copyright (c) 2014-2017 Research Organization for Information Science
1010
* and Technology (RIST). All rights reserved.
11-
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
11+
* Copyright (c) 2021-2026 Nanook Consulting All rights reserved.
1212
* $COPYRIGHT$
1313
*
1414
* Additional copyrights may follow
@@ -801,7 +801,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
801801
prte_grpcomm_direct_group_signature_t *sig = NULL;
802802
prte_pmix_grp_caddy_t cd2, *cd;
803803
int32_t cnt;
804-
pmix_status_t rc = PMIX_SUCCESS, st;
804+
pmix_status_t rc = PMIX_SUCCESS, st = PMIX_SUCCESS;
805805
pmix_proc_t *finalmembership = NULL;
806806
size_t nfinal = 0;
807807
size_t nendpts = 0;
@@ -811,7 +811,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
811811
pmix_info_t *grpinfo = NULL;
812812
pmix_info_t *endpts = NULL;
813813
prte_pmix_server_pset_t *pset;
814-
void *ilist;
814+
void *ilist, *nlist;
815815
PRTE_HIDE_UNUSED_PARAMS(status, sender, tag, cbdata);
816816

817817
PMIX_ACQUIRE_OBJECT(cd);
@@ -837,7 +837,6 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
837837
if (PMIX_SUCCESS != rc) {
838838
PMIX_ERROR_LOG(rc);
839839
st = rc;
840-
goto notify;
841840
}
842841

843842
/* if this was a destruct operation, then there is nothing
@@ -854,22 +853,37 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
854853
}
855854
if (NULL != coll && NULL != coll->cbfunc) {
856855
/* return to the local procs in the collective */
857-
coll->cbfunc(rc, NULL, 0, coll->cbdata, NULL, NULL);
856+
coll->cbfunc(st, NULL, 0, coll->cbdata, NULL, NULL);
858857
}
859858
// remove the tracker, if found
860859
find_delete_tracker(sig);
861860
PMIX_RELEASE(sig);
862861
return;
863862
}
864863

865-
// must be a construct operation - continue unpacking
864+
// setup to cache info
866865
ilist = PMIx_Info_list_start();
866+
nlist = PMIx_Info_list_start();
867+
868+
// must be a construct operation - continue unpacking
869+
if (PMIX_SUCCESS != st) {
870+
PMIX_INFO_LIST_RELEASE(ilist);
871+
goto notify;
872+
}
867873

868874
if (sig->ctxid_assigned) {
869875
PMIX_INFO_LIST_ADD(rc, ilist, PMIX_GROUP_CONTEXT_ID, &sig->ctxid, PMIX_SIZE);
870876
if (PMIX_SUCCESS != rc) {
871877
PMIX_ERROR_LOG(rc);
872878
st = rc;
879+
PMIX_INFO_LIST_RELEASE(ilist);
880+
goto notify;
881+
}
882+
PMIX_INFO_LIST_ADD(rc, nlist, PMIX_GROUP_CONTEXT_ID, &sig->ctxid, PMIX_SIZE);
883+
if (PMIX_SUCCESS != rc) {
884+
PMIX_ERROR_LOG(rc);
885+
st = rc;
886+
PMIX_INFO_LIST_RELEASE(ilist);
873887
goto notify;
874888
}
875889
}
@@ -880,6 +894,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
880894
if (PMIX_SUCCESS != rc) {
881895
PMIX_ERROR_LOG(rc);
882896
st = rc;
897+
PMIX_INFO_LIST_RELEASE(ilist);
883898
goto notify;
884899
}
885900
if (0 < nfinal) {
@@ -889,6 +904,20 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
889904
if (PMIX_SUCCESS != rc) {
890905
PMIX_ERROR_LOG(rc);
891906
st = rc;
907+
PMIX_INFO_LIST_RELEASE(ilist);
908+
goto notify;
909+
}
910+
// pass back the final group membership
911+
darray.type = PMIX_PROC;
912+
darray.array = finalmembership;
913+
darray.size = nfinal;
914+
// load the array - note: this copies the array!
915+
PMIX_INFO_LIST_ADD(rc, nlist, PMIX_GROUP_MEMBERSHIP, &darray, PMIX_DATA_ARRAY);
916+
PMIX_PROC_FREE(finalmembership, nfinal);
917+
if (PMIX_SUCCESS != rc) {
918+
PMIX_ERROR_LOG(rc);
919+
st = rc;
920+
PMIX_INFO_LIST_RELEASE(ilist);
892921
goto notify;
893922
}
894923
}
@@ -899,6 +928,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
899928
if (PMIX_SUCCESS != rc) {
900929
PMIX_ERROR_LOG(rc);
901930
st = rc;
931+
PMIX_INFO_LIST_RELEASE(ilist);
902932
goto notify;
903933
}
904934
if (0 < ngrpinfo) {
@@ -908,13 +938,27 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
908938
if (PMIX_SUCCESS != rc) {
909939
PMIX_ERROR_LOG(rc);
910940
st = rc;
941+
PMIX_INFO_LIST_RELEASE(ilist);
942+
PMIX_INFO_FREE(grpinfo, ngrpinfo);
911943
goto notify;
912944
}
913-
// transfer them to our list
945+
// transfer them to both lists
914946
for (n=0; n < ngrpinfo; n++) {
915947
rc = PMIx_Info_list_add_value(ilist, PMIX_GROUP_INFO, &grpinfo[n].value);
916948
if (PMIX_SUCCESS != rc) {
917949
PMIX_ERROR_LOG(rc);
950+
st = rc;
951+
PMIX_INFO_LIST_RELEASE(ilist);
952+
PMIX_INFO_FREE(grpinfo, ngrpinfo);
953+
goto notify;
954+
}
955+
rc = PMIx_Info_list_add_value(nlist, PMIX_GROUP_INFO, &grpinfo[n].value);
956+
if (PMIX_SUCCESS != rc) {
957+
PMIX_ERROR_LOG(rc);
958+
st = rc;
959+
PMIX_INFO_LIST_RELEASE(ilist);
960+
PMIX_INFO_FREE(grpinfo, ngrpinfo);
961+
goto notify;
918962
}
919963
}
920964
PMIX_INFO_FREE(grpinfo, ngrpinfo);
@@ -927,6 +971,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
927971
if (PMIX_SUCCESS != rc) {
928972
PMIX_ERROR_LOG(rc);
929973
st = rc;
974+
PMIX_INFO_LIST_RELEASE(ilist);
930975
goto notify;
931976
}
932977
if (0 < nendpts) {
@@ -936,13 +981,27 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
936981
if (PMIX_SUCCESS != rc) {
937982
PMIX_ERROR_LOG(rc);
938983
st = rc;
984+
PMIX_INFO_LIST_RELEASE(ilist);
985+
PMIX_INFO_FREE(endpts, nendpts);
939986
goto notify;
940987
}
941-
// transfer them to our list
988+
// transfer them to both lists
942989
for (n=0; n < nendpts; n++) {
943990
rc = PMIx_Info_list_add_value(ilist, PMIX_GROUP_ENDPT_DATA, &endpts[n].value);
944991
if (PMIX_SUCCESS != rc) {
945992
PMIX_ERROR_LOG(rc);
993+
st = rc;
994+
PMIX_INFO_LIST_RELEASE(ilist);
995+
PMIX_INFO_FREE(endpts, nendpts);
996+
goto notify;
997+
}
998+
rc = PMIx_Info_list_add_value(nlist, PMIX_GROUP_ENDPT_DATA, &endpts[n].value);
999+
if (PMIX_SUCCESS != rc) {
1000+
PMIX_ERROR_LOG(rc);
1001+
st = rc;
1002+
PMIX_INFO_LIST_RELEASE(ilist);
1003+
PMIX_INFO_FREE(endpts, nendpts);
1004+
goto notify;
9461005
}
9471006
}
9481007
PMIX_INFO_FREE(endpts, nendpts);
@@ -987,50 +1046,29 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
9871046
if (NULL != coll && NULL != coll->cbfunc) {
9881047
// service the procs that are part of the collective
9891048

990-
PMIX_INFO_LIST_START(ilist);
991-
if (NULL != finalmembership) {
992-
// pass back the final group membership
993-
darray.type = PMIX_PROC;
994-
darray.array = finalmembership;
995-
darray.size = nfinal;
996-
// load the array - note: this copies the array!
997-
PMIX_INFO_LIST_ADD(rc, ilist, PMIX_GROUP_MEMBERSHIP, &darray, PMIX_DATA_ARRAY);
998-
if (PMIX_SUCCESS != rc) {
999-
PMIX_ERROR_LOG(rc);
1000-
}
1001-
}
1002-
1003-
if (sig->ctxid_assigned) {
1004-
PMIX_INFO_LIST_ADD(rc, ilist, PMIX_GROUP_CONTEXT_ID, &sig->ctxid, PMIX_SIZE);
1005-
if (PMIX_SUCCESS != rc) {
1049+
// convert for returning to PMIx server library
1050+
cd = PMIX_NEW(prte_pmix_grp_caddy_t);
1051+
if (PMIX_SUCCESS == st) {
1052+
PMIX_INFO_LIST_CONVERT(rc, nlist, &darray);
1053+
if (PMIX_SUCCESS != rc && PMIX_ERR_EMPTY != rc) {
10061054
PMIX_ERROR_LOG(rc);
10071055
}
1056+
cd->info = (pmix_info_t*)darray.array;
1057+
cd->ninfo = darray.size;
10081058
}
10091059

1010-
// convert for returning to PMIx server library
1011-
PMIX_INFO_LIST_CONVERT(rc, ilist, &darray);
1012-
if (PMIX_SUCCESS != rc && PMIX_ERR_EMPTY != rc) {
1013-
PMIX_ERROR_LOG(rc);
1014-
}
1015-
cd = PMIX_NEW(prte_pmix_grp_caddy_t);
1016-
cd->info = (pmix_info_t*)darray.array;
1017-
cd->ninfo = darray.size;
1018-
PMIX_INFO_LIST_RELEASE(ilist);
1019-
10201060
/* return to the PMIx server library for relay to
10211061
* local procs in the operation */
1022-
coll->cbfunc(rc, cd->info, cd->ninfo, coll->cbdata, relcb, (void*)cd);
1062+
coll->cbfunc(st, cd->info, cd->ninfo, coll->cbdata, relcb, (void*)cd);
10231063
}
10241064

1025-
if (NULL != finalmembership) {
1026-
PMIX_PROC_FREE(finalmembership, nfinal);
1027-
}
10281065
if (0 < nendpts) {
10291066
PMIX_INFO_FREE(endpts, nendpts);
10301067
}
10311068
if (0 < ngrpinfo) {
10321069
PMIX_INFO_FREE(grpinfo, ngrpinfo);
10331070
}
1071+
PMIX_INFO_LIST_RELEASE(nlist);
10341072
// remove this collective from our tracker
10351073
find_delete_tracker(sig);
10361074
PMIX_RELEASE(sig);

0 commit comments

Comments
 (0)