Skip to content

Commit 7a7fd93

Browse files
committed
Pass group info in PMIx server callback
Ensures that any provided values can be found. Signed-off-by: Ralph Castain <[email protected]>
1 parent 53a5759 commit 7a7fd93

File tree

1 file changed

+86
-37
lines changed

1 file changed

+86
-37
lines changed

src/mca/grpcomm/direct/grpcomm_direct_group.c

Lines changed: 86 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
99
* Copyright (c) 2014-2017 Research Organization for Information Science
1010
* and Technology (RIST). All rights reserved.
11-
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
11+
* Copyright (c) 2021-2026 Nanook Consulting All rights reserved.
1212
* $COPYRIGHT$
1313
*
1414
* Additional copyrights may follow
@@ -801,7 +801,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
801801
prte_grpcomm_direct_group_signature_t *sig = NULL;
802802
prte_pmix_grp_caddy_t cd2, *cd;
803803
int32_t cnt;
804-
pmix_status_t rc = PMIX_SUCCESS, st;
804+
pmix_status_t rc = PMIX_SUCCESS, st = PMIX_SUCCESS;
805805
pmix_proc_t *finalmembership = NULL;
806806
size_t nfinal = 0;
807807
size_t nendpts = 0;
@@ -811,7 +811,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
811811
pmix_info_t *grpinfo = NULL;
812812
pmix_info_t *endpts = NULL;
813813
prte_pmix_server_pset_t *pset;
814-
void *ilist;
814+
void *ilist, *nlist;
815815
PRTE_HIDE_UNUSED_PARAMS(status, sender, tag, cbdata);
816816

817817
PMIX_ACQUIRE_OBJECT(cd);
@@ -837,7 +837,6 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
837837
if (PMIX_SUCCESS != rc) {
838838
PMIX_ERROR_LOG(rc);
839839
st = rc;
840-
goto notify;
841840
}
842841

843842
/* if this was a destruct operation, then there is nothing
@@ -854,7 +853,7 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
854853
}
855854
if (NULL != coll && NULL != coll->cbfunc) {
856855
/* return to the local procs in the collective */
857-
coll->cbfunc(rc, NULL, 0, coll->cbdata, NULL, NULL);
856+
coll->cbfunc(st, NULL, 0, coll->cbdata, NULL, NULL);
858857
}
859858
// remove the tracker, if found
860859
find_delete_tracker(sig);
@@ -863,13 +862,28 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
863862
}
864863

865864
// must be a construct operation - continue unpacking
865+
if (PMIX_SUCCESS != st) {
866+
goto notify;
867+
}
868+
866869
ilist = PMIx_Info_list_start();
870+
nlist = PMIx_Info_list_start();
867871

868872
if (sig->ctxid_assigned) {
869873
PMIX_INFO_LIST_ADD(rc, ilist, PMIX_GROUP_CONTEXT_ID, &sig->ctxid, PMIX_SIZE);
870874
if (PMIX_SUCCESS != rc) {
871875
PMIX_ERROR_LOG(rc);
872876
st = rc;
877+
PMIX_INFO_LIST_RELEASE(ilist);
878+
PMIX_INFO_LIST_RELEASE(nlist);
879+
goto notify;
880+
}
881+
PMIX_INFO_LIST_ADD(rc, nlist, PMIX_GROUP_CONTEXT_ID, &sig->ctxid, PMIX_SIZE);
882+
if (PMIX_SUCCESS != rc) {
883+
PMIX_ERROR_LOG(rc);
884+
st = rc;
885+
PMIX_INFO_LIST_RELEASE(ilist);
886+
PMIX_INFO_LIST_RELEASE(nlist);
873887
goto notify;
874888
}
875889
}
@@ -880,6 +894,8 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
880894
if (PMIX_SUCCESS != rc) {
881895
PMIX_ERROR_LOG(rc);
882896
st = rc;
897+
PMIX_INFO_LIST_RELEASE(ilist);
898+
PMIX_INFO_LIST_RELEASE(nlist);
883899
goto notify;
884900
}
885901
if (0 < nfinal) {
@@ -889,6 +905,22 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
889905
if (PMIX_SUCCESS != rc) {
890906
PMIX_ERROR_LOG(rc);
891907
st = rc;
908+
PMIX_INFO_LIST_RELEASE(ilist);
909+
PMIX_INFO_LIST_RELEASE(nlist);
910+
goto notify;
911+
}
912+
// pass back the final group membership
913+
darray.type = PMIX_PROC;
914+
darray.array = finalmembership;
915+
darray.size = nfinal;
916+
// load the array - note: this copies the array!
917+
PMIX_INFO_LIST_ADD(rc, nlist, PMIX_GROUP_MEMBERSHIP, &darray, PMIX_DATA_ARRAY);
918+
PMIX_PROC_FREE(finalmembership, nfinal);
919+
if (PMIX_SUCCESS != rc) {
920+
PMIX_ERROR_LOG(rc);
921+
st = rc;
922+
PMIX_INFO_LIST_RELEASE(ilist);
923+
PMIX_INFO_LIST_RELEASE(nlist);
892924
goto notify;
893925
}
894926
}
@@ -899,6 +931,8 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
899931
if (PMIX_SUCCESS != rc) {
900932
PMIX_ERROR_LOG(rc);
901933
st = rc;
934+
PMIX_INFO_LIST_RELEASE(ilist);
935+
PMIX_INFO_LIST_RELEASE(nlist);
902936
goto notify;
903937
}
904938
if (0 < ngrpinfo) {
@@ -908,13 +942,30 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
908942
if (PMIX_SUCCESS != rc) {
909943
PMIX_ERROR_LOG(rc);
910944
st = rc;
945+
PMIX_INFO_LIST_RELEASE(ilist);
946+
PMIX_INFO_LIST_RELEASE(nlist);
947+
PMIX_INFO_FREE(grpinfo, ngrpinfo);
911948
goto notify;
912949
}
913-
// transfer them to our list
950+
// transfer them to both lists
914951
for (n=0; n < ngrpinfo; n++) {
915952
rc = PMIx_Info_list_add_value(ilist, PMIX_GROUP_INFO, &grpinfo[n].value);
916953
if (PMIX_SUCCESS != rc) {
917954
PMIX_ERROR_LOG(rc);
955+
st = rc;
956+
PMIX_INFO_LIST_RELEASE(ilist);
957+
PMIX_INFO_LIST_RELEASE(nlist);
958+
PMIX_INFO_FREE(grpinfo, ngrpinfo);
959+
goto notify;
960+
}
961+
rc = PMIx_Info_list_add_value(nlist, PMIX_GROUP_INFO, &grpinfo[n].value);
962+
if (PMIX_SUCCESS != rc) {
963+
PMIX_ERROR_LOG(rc);
964+
st = rc;
965+
PMIX_INFO_LIST_RELEASE(ilist);
966+
PMIX_INFO_LIST_RELEASE(nlist);
967+
PMIX_INFO_FREE(grpinfo, ngrpinfo);
968+
goto notify;
918969
}
919970
}
920971
PMIX_INFO_FREE(grpinfo, ngrpinfo);
@@ -927,6 +978,8 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
927978
if (PMIX_SUCCESS != rc) {
928979
PMIX_ERROR_LOG(rc);
929980
st = rc;
981+
PMIX_INFO_LIST_RELEASE(ilist);
982+
PMIX_INFO_LIST_RELEASE(nlist);
930983
goto notify;
931984
}
932985
if (0 < nendpts) {
@@ -936,13 +989,30 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
936989
if (PMIX_SUCCESS != rc) {
937990
PMIX_ERROR_LOG(rc);
938991
st = rc;
992+
PMIX_INFO_LIST_RELEASE(ilist);
993+
PMIX_INFO_LIST_RELEASE(nlist);
994+
PMIX_INFO_FREE(endpts, nendpts);
939995
goto notify;
940996
}
941-
// transfer them to our list
997+
// transfer them to both lists
942998
for (n=0; n < nendpts; n++) {
943999
rc = PMIx_Info_list_add_value(ilist, PMIX_GROUP_ENDPT_DATA, &endpts[n].value);
9441000
if (PMIX_SUCCESS != rc) {
9451001
PMIX_ERROR_LOG(rc);
1002+
st = rc;
1003+
PMIX_INFO_LIST_RELEASE(ilist);
1004+
PMIX_INFO_LIST_RELEASE(nlist);
1005+
PMIX_INFO_FREE(endpts, nendpts);
1006+
goto notify;
1007+
}
1008+
rc = PMIx_Info_list_add_value(nlist, PMIX_GROUP_ENDPT_DATA, &endpts[n].value);
1009+
if (PMIX_SUCCESS != rc) {
1010+
PMIX_ERROR_LOG(rc);
1011+
st = rc;
1012+
PMIX_INFO_LIST_RELEASE(ilist);
1013+
PMIX_INFO_LIST_RELEASE(nlist);
1014+
PMIX_INFO_FREE(endpts, nendpts);
1015+
goto notify;
9461016
}
9471017
}
9481018
PMIX_INFO_FREE(endpts, nendpts);
@@ -987,44 +1057,23 @@ void prte_grpcomm_direct_grp_release(int status, pmix_proc_t *sender,
9871057
if (NULL != coll && NULL != coll->cbfunc) {
9881058
// service the procs that are part of the collective
9891059

990-
PMIX_INFO_LIST_START(ilist);
991-
if (NULL != finalmembership) {
992-
// pass back the final group membership
993-
darray.type = PMIX_PROC;
994-
darray.array = finalmembership;
995-
darray.size = nfinal;
996-
// load the array - note: this copies the array!
997-
PMIX_INFO_LIST_ADD(rc, ilist, PMIX_GROUP_MEMBERSHIP, &darray, PMIX_DATA_ARRAY);
998-
if (PMIX_SUCCESS != rc) {
999-
PMIX_ERROR_LOG(rc);
1000-
}
1001-
}
1002-
1003-
if (sig->ctxid_assigned) {
1004-
PMIX_INFO_LIST_ADD(rc, ilist, PMIX_GROUP_CONTEXT_ID, &sig->ctxid, PMIX_SIZE);
1005-
if (PMIX_SUCCESS != rc) {
1060+
// convert for returning to PMIx server library
1061+
cd = PMIX_NEW(prte_pmix_grp_caddy_t);
1062+
if (PMIX_SUCCESS == st) {
1063+
PMIX_INFO_LIST_CONVERT(rc, nlist, &darray);
1064+
if (PMIX_SUCCESS != rc && PMIX_ERR_EMPTY != rc) {
10061065
PMIX_ERROR_LOG(rc);
10071066
}
1067+
cd->info = (pmix_info_t*)darray.array;
1068+
cd->ninfo = darray.size;
1069+
PMIX_INFO_LIST_RELEASE(nlist);
10081070
}
10091071

1010-
// convert for returning to PMIx server library
1011-
PMIX_INFO_LIST_CONVERT(rc, ilist, &darray);
1012-
if (PMIX_SUCCESS != rc && PMIX_ERR_EMPTY != rc) {
1013-
PMIX_ERROR_LOG(rc);
1014-
}
1015-
cd = PMIX_NEW(prte_pmix_grp_caddy_t);
1016-
cd->info = (pmix_info_t*)darray.array;
1017-
cd->ninfo = darray.size;
1018-
PMIX_INFO_LIST_RELEASE(ilist);
1019-
10201072
/* return to the PMIx server library for relay to
10211073
* local procs in the operation */
1022-
coll->cbfunc(rc, cd->info, cd->ninfo, coll->cbdata, relcb, (void*)cd);
1074+
coll->cbfunc(st, cd->info, cd->ninfo, coll->cbdata, relcb, (void*)cd);
10231075
}
10241076

1025-
if (NULL != finalmembership) {
1026-
PMIX_PROC_FREE(finalmembership, nfinal);
1027-
}
10281077
if (0 < nendpts) {
10291078
PMIX_INFO_FREE(endpts, nendpts);
10301079
}

0 commit comments

Comments
 (0)