4343#include <semaphore.h>
4444#include <sys/mman.h>
4545#include <libgen.h>
46-
46+ #include <unistd.h>
4747
4848int mca_sharedfp_sm_file_open (struct ompi_communicator_t * comm ,
4949 const char * filename ,
@@ -57,12 +57,16 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
5757 mca_io_ompio_file_t * shfileHandle , * ompio_fh ;
5858 char * filename_basename ;
5959 char * sm_filename ;
60+ int sm_filename_length ;
6061 struct mca_sharedfp_sm_offset * sm_offset_ptr ;
6162 struct mca_sharedfp_sm_offset sm_offset ;
6263 mca_io_ompio_data_t * data ;
6364 int sm_fd ;
6465 int rank ;
65-
66+ uint32_t comm_cid ;
67+ int int_pid ;
68+ pid_t my_pid ;
69+
6670 /*----------------------------------------------------*/
6771 /*Open the same file again without shared file pointer*/
6872 /*----------------------------------------------------*/
@@ -132,25 +136,33 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
132136 ** TODO: properly name the file so that different jobs can run on the same system w/o
133137 ** overwriting each other, e.g. orte_process_info.proc_session_dir
134138 */
135- /*sprintf(sm_filename,"%s%s",filename,".sm");*/
136- filename_basename = basename ((void * )filename );
137- sm_filename = (char * ) malloc ( sizeof (char ) * (strlen (filename_basename )+ 64 ) );
139+ filename_basename = basename ((char * )filename );
140+ /* format is "%s/%s_cid-%d-%d.sm", see below */
141+ sm_filename_length = strlen (ompi_process_info .job_session_dir ) + 1 + strlen (filename_basename ) + 5 + (3 * sizeof (uint32_t )+1 ) + 4 ;
142+ sm_filename = (char * ) malloc ( sizeof (char ) * sm_filename_length );
138143 if (NULL == sm_filename ) {
139144 free (sm_data );
140145 free (sh );
141146 free (shfileHandle );
142147 return OMPI_ERR_OUT_OF_RESOURCE ;
143148 }
144149
145- opal_jobid_t masterjobid ;
146- if ( 0 == comm -> c_my_rank ) {
147- ompi_proc_t * masterproc = ompi_group_peer_lookup (comm -> c_local_group , 0 );
148- masterjobid = OMPI_CAST_RTE_NAME (& masterproc -> super .proc_name )-> jobid ;
150+ comm_cid = ompi_comm_get_cid (comm );
151+ if ( 0 == fh -> f_rank ) {
152+ my_pid = getpid ();
153+ int_pid = (int ) my_pid ;
154+ }
155+ err = comm -> c_coll -> coll_bcast (& int_pid , 1 , MPI_INT , 0 , comm , comm -> c_coll -> coll_bcast_module );
156+ if ( OMPI_SUCCESS != err ) {
157+ opal_output (0 ,"mca_sharedfp_sm_file_open: Error in bcast operation \n" );
158+ free (sm_filename );
159+ free (sm_data );
160+ free (sh );
161+ return err ;
149162 }
150- comm -> c_coll -> coll_bcast ( & masterjobid , 1 , MPI_UNSIGNED , 0 , comm ,
151- comm -> c_coll -> coll_bcast_module );
163+ snprintf ( sm_filename , sm_filename_length , "%s/%s_cid-%d-%d.sm" , ompi_process_info . job_session_dir ,
164+ filename_basename , comm_cid , int_pid );
152165
153- sprintf (sm_filename ,"/tmp/OMPIO_%s_%d_%s" ,filename_basename , masterjobid , ".sm" );
154166 /* open shared memory file, initialize to 0, map into memory */
155167 sm_fd = open (sm_filename , O_RDWR | O_CREAT ,
156168 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
0 commit comments