diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 89bdf56aa45..fba605e69a3 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -35,6 +35,7 @@ #include #endif #include +#include int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, const char* filename, @@ -50,6 +51,9 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, mca_io_ompio_file_t * shfileHandle, *ompio_fh; mca_io_ompio_data_t *data; + pid_t my_pid; + int int_pid; + /*------------------------------------------------------------*/ /*Open the same file again without shared file pointer support*/ /*------------------------------------------------------------*/ @@ -110,7 +114,19 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, comm->c_coll->coll_bcast_module ); - size_t filenamelen = strlen(filename) + 16; + if ( 0 == fh->f_rank ) { + my_pid = getpid(); + int_pid = (int) my_pid; + } + err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module ); + if ( OMPI_SUCCESS != err ) { + opal_output(0, "[%d]mca_sharedfp_lockedfile_file_open: Error in bcast operation\n", fh->f_rank); + free (sh); + free(module_data); + return err; + } + + size_t filenamelen = strlen(filename) + 24; lockedfilename = (char*)malloc(sizeof(char) * filenamelen); if ( NULL == lockedfilename ) { free (shfileHandle); @@ -118,7 +134,7 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, free (module_data); return OMPI_ERR_OUT_OF_RESOURCE; } - snprintf(lockedfilename, filenamelen, "%s-%u%s",filename,masterjobid,".lock"); + snprintf(lockedfilename, filenamelen, "%s-%u-%d%s",filename,masterjobid,int_pid,".lock"); module_data->filename = lockedfilename; /*-------------------------------------------------*/ diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index ab7ffdd77f9..3b3cd6e9792 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -44,7 +44,7 @@ #include #include #include - +#include int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, const char* filename, @@ -65,7 +65,9 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, int sm_fd; int rank; uint32_t comm_cid; - + int int_pid; + pid_t my_pid; + /*----------------------------------------------------*/ /*Open the same file again without shared file pointer*/ /*----------------------------------------------------*/ @@ -134,7 +136,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ filename_basename = basename(filename); - /* format is "%s/%s_cid-%d.sm", see below */ + /* format is "%s/%s_cid-%d-%d.sm", see below */ sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); if (NULL == sm_filename) { @@ -146,7 +148,21 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, } comm_cid = ompi_comm_get_cid(comm); - sprintf(sm_filename, "%s/%s_cid-%d.sm", ompi_process_info.job_session_dir, filename_basename, comm_cid); + if ( 0 == fh->f_rank ) { + my_pid = getpid(); + int_pid = (int) my_pid; + } + err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module ); + if ( OMPI_SUCCESS != err ) { + opal_output(0,"mca_sharedfp_sm_file_open: Error in bcast operation \n"); + free(sm_filename); + free(sm_data); + free(sh); + return err; + } + snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir, + filename_basename, comm_cid, int_pid); + /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);