@@ -58,11 +58,13 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
5858 mca_io_ompio_file_t * shfileHandle , * ompio_fh ;
5959 char * filename_basename ;
6060 char * sm_filename ;
61+ int sm_filename_length ;
6162 struct mca_sharedfp_sm_offset * sm_offset_ptr ;
6263 struct mca_sharedfp_sm_offset sm_offset ;
6364 mca_io_ompio_data_t * data ;
6465 int sm_fd ;
6566 int rank ;
67+ uint32_t comm_cid ;
6668
6769 /*----------------------------------------------------*/
6870 /*Open the same file again without shared file pointer*/
@@ -130,28 +132,21 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
130132 /* the shared memory segment is identified opening a file
131133 ** and then mapping it to memory
132134 ** For sharedfp we also want to put the file backed shared memory into the tmp directory
133- ** TODO: properly name the file so that different jobs can run on the same system w/o
134- ** overwriting each other, e.g. orte_process_info.proc_session_dir
135135 */
136- /*sprintf(sm_filename,"%s%s",filename,".sm");*/
137- filename_basename = basename ((void * )filename );
138- sm_filename = (char * ) malloc ( sizeof (char ) * (strlen (filename_basename )+ 64 ) );
136+ filename_basename = basename (filename );
137+ /* format is "%s/%s_cid-%d.sm", see below */
138+ sm_filename_length = strlen (ompi_process_info .job_session_dir ) + 1 + strlen (filename_basename ) + 5 + (3 * sizeof (uint32_t )+1 ) + 4 ;
139+ sm_filename = (char * ) malloc ( sizeof (char ) * sm_filename_length );
139140 if (NULL == sm_filename ) {
141+ opal_output (0 , "mca_sharedfp_sm_file_open: Error, unable to malloc sm_filename\n" );
140142 free (sm_data );
141143 free (sh );
142144 free (shfileHandle );
143145 return OMPI_ERR_OUT_OF_RESOURCE ;
144146 }
145147
146- opal_jobid_t masterjobid ;
147- if ( 0 == comm -> c_my_rank ) {
148- ompi_proc_t * masterproc = ompi_group_peer_lookup (comm -> c_local_group , 0 );
149- masterjobid = OMPI_CAST_RTE_NAME (& masterproc -> super .proc_name )-> jobid ;
150- }
151- comm -> c_coll -> coll_bcast ( & masterjobid , 1 , MPI_UNSIGNED , 0 , comm ,
152- comm -> c_coll -> coll_bcast_module );
153-
154- sprintf (sm_filename ,"/tmp/OMPIO_%s_%d_%s" ,filename_basename , masterjobid , ".sm" );
148+ comm_cid = ompi_comm_get_cid (comm );
149+ sprintf (sm_filename , "%s/%s_cid-%d.sm" , ompi_process_info .job_session_dir , filename_basename , comm_cid );
155150 /* open shared memory file, initialize to 0, map into memory */
156151 sm_fd = open (sm_filename , O_RDWR | O_CREAT ,
157152 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
0 commit comments