Skip to content

Commit 4a97086

Browse files
author
Ralph Castain
committed
Fix intercomm_create by ensuring that both sides know how to translate jobid to/from nspace
Return something just to ensure that pack is happy (cherry picked from commit open-mpi/ompi@bfdf08a)
1 parent 61b4e3a commit 4a97086

File tree

4 files changed

+26
-3
lines changed

4 files changed

+26
-3
lines changed

ompi/proc/proc.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
613613
opal_buffer_t* buf)
614614
{
615615
int rc;
616+
char *nspace;
616617

617618
OPAL_THREAD_LOCK(&ompi_proc_lock);
618619

@@ -629,18 +630,30 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
629630
* can be sent.
630631
*/
631632
for (int i = 0 ; i < proclistsize ; ++i) {
633+
/* send proc name */
632634
rc = opal_dss.pack(buf, &(proclist[i]->super.proc_name), 1, OMPI_NAME);
633635
if(rc != OPAL_SUCCESS) {
634636
OMPI_ERROR_LOG(rc);
635637
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
636638
return rc;
637639
}
640+
/* retrieve and send the corresponding nspace for this job
641+
* as the remote side may not know the translation */
642+
nspace = (char*)opal_pmix.get_nspace(proclist[i]->super.proc_name.jobid);
643+
rc = opal_dss.pack(buf, &nspace, 1, OPAL_STRING);
644+
if(rc != OPAL_SUCCESS) {
645+
OMPI_ERROR_LOG(rc);
646+
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
647+
return rc;
648+
}
649+
/* pack architecture flag */
638650
rc = opal_dss.pack(buf, &(proclist[i]->super.proc_arch), 1, OPAL_UINT32);
639651
if(rc != OPAL_SUCCESS) {
640652
OMPI_ERROR_LOG(rc);
641653
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
642654
return rc;
643655
}
656+
/* pass the name of the host this proc is on */
644657
rc = opal_dss.pack(buf, &(proclist[i]->super.proc_hostname), 1, OPAL_STRING);
645658
if(rc != OPAL_SUCCESS) {
646659
OMPI_ERROR_LOG(rc);
@@ -720,6 +733,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
720733
char *new_hostname;
721734
bool isnew = false;
722735
int rc;
736+
char *nspace;
723737

724738
rc = opal_dss.unpack(buf, &new_name, &count, OMPI_NAME);
725739
if (rc != OPAL_SUCCESS) {
@@ -728,6 +742,15 @@ ompi_proc_unpack(opal_buffer_t* buf,
728742
free(newprocs);
729743
return rc;
730744
}
745+
rc = opal_dss.unpack(buf, &nspace, &count, OPAL_STRING);
746+
if (rc != OPAL_SUCCESS) {
747+
OMPI_ERROR_LOG(rc);
748+
free(plist);
749+
free(newprocs);
750+
return rc;
751+
}
752+
opal_pmix.register_jobid(new_name.jobid, nspace);
753+
free(nspace);
731754
rc = opal_dss.unpack(buf, &new_arch, &count, OPAL_UINT32);
732755
if (rc != OPAL_SUCCESS) {
733756
OMPI_ERROR_LOG(rc);

opal/mca/pmix/cray/pmix_cray.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ static int cray_store_local(const opal_process_name_t *proc,
821821

822822
static const char *cray_get_nspace(opal_jobid_t jobid)
823823
{
824-
return NULL;
824+
return "N/A";
825825
}
826826

827827
static void cray_register_jobid(opal_jobid_t jobid, const char *nspace)

opal/mca/pmix/s1/pmix_s1.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ static int s1_store_local(const opal_process_name_t *proc,
650650

651651
static const char *s1_get_nspace(opal_jobid_t jobid)
652652
{
653-
return NULL;
653+
return "N/A";
654654
}
655655
static void s1_register_jobid(opal_jobid_t jobid, const char *nspace)
656656
{

opal/mca/pmix/s2/pmix_s2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ static int s2_store_local(const opal_process_name_t *proc,
669669

670670
static const char *s2_get_nspace(opal_jobid_t jobid)
671671
{
672-
return NULL;
672+
return "N/A";
673673
}
674674
static void s2_register_jobid(opal_jobid_t jobid, const char *nspace)
675675
{

0 commit comments

Comments
 (0)