Skip to content

Commit 6178e43

Browse files
committed
common/ompio and sharedfp/lockedfile: store full path
This fix is based on a bug report on the mailing list. If the user opens a file in a directory, but executes a chdir later before closing the file, ompio was unable to properly clean up the lock files generated by the lockedfile component. After some investigation it was confirmed, that the same problem arises if a user would perform the same sequence of operations using the MODE_DELETE_AFTER_CLOSE, hence the fix also had to be applied to the general file handle, not just the sharedfp component. Note, this fix should apply clearly to 5.0, but I am not entirely sure whether we'll have to generate a separate patch for 4.1 and 4.0, since the common_ompio datastructure has changed and got an additional element. Fixes issue #9924 Signed-off-by: Edgar Gabriel <[email protected]> incorporate changes requested during the review Signed-off-by: Edgar Gabriel <[email protected]>
1 parent 6d9455b commit 6178e43

File tree

3 files changed

+49
-8
lines changed

3 files changed

+49
-8
lines changed

ompi/mca/common/ompio/common_ompio.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct ompio_file_t {
158158
int f_perm;
159159
ompi_communicator_t *f_comm;
160160
const char *f_filename;
161+
char *f_fullfilename;
161162
char *f_datarep;
162163
opal_convertor_t *f_mem_convertor;
163164
opal_convertor_t *f_file_convertor;

ompi/mca/common/ompio/common_ompio_file_open.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
#include <math.h>
4242
#include "common_ompio.h"
4343
#include "ompi/mca/topo/topo.h"
44+
#include "opal/util/opal_getcwd.h"
45+
#include "opal/util/path.h"
46+
#include "opal/util/os_path.h"
4447

4548
static mca_common_ompio_generate_current_file_view_fn_t generate_current_file_view_fn;
4649
static mca_common_ompio_get_mca_parameter_value_fn_t get_mca_parameter_value_fn;
@@ -100,6 +103,22 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm,
100103
ompio_fh->f_get_mca_parameter_value=get_mca_parameter_value_fn;
101104

102105
ompio_fh->f_filename = filename;
106+
if (opal_path_is_absolute(filename) ) {
107+
ompio_fh->f_fullfilename = strdup(filename);
108+
}
109+
else {
110+
char path[OPAL_PATH_MAX];
111+
ret = opal_getcwd(path, OPAL_PATH_MAX);
112+
if (OPAL_SUCCESS != ret) {
113+
goto fn_fail;
114+
}
115+
ompio_fh->f_fullfilename = opal_os_path(0, path, filename, NULL);
116+
if (NULL == ompio_fh->f_fullfilename){
117+
ret = OMPI_ERROR;
118+
goto fn_fail;
119+
}
120+
}
121+
103122
mca_common_ompio_set_file_defaults (ompio_fh);
104123

105124
ompio_fh->f_split_coll_req = NULL;
@@ -285,7 +304,7 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
285304
ret = ompio_fh->f_fs->fs_file_close (ompio_fh);
286305
}
287306
if ( delete_flag ) {
288-
ret = mca_common_ompio_file_delete ( ompio_fh->f_filename, &(MPI_INFO_NULL->super) );
307+
ret = mca_common_ompio_file_delete ( ompio_fh->f_fullfilename, &(MPI_INFO_NULL->super) );
289308
}
290309

291310
if ( NULL != ompio_fh->f_fs ) {
@@ -350,7 +369,8 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
350369
free ( ompio_fh->f_coll_write_time );
351370
ompio_fh->f_coll_write_time = NULL;
352371
}
353-
372+
free (ompio_fh->f_fullfilename);
373+
354374
if ( NULL != ompio_fh->f_coll_read_time ) {
355375
free ( ompio_fh->f_coll_read_time );
356376
ompio_fh->f_coll_read_time = NULL;
@@ -371,8 +391,7 @@ int mca_common_ompio_file_close (ompio_file_t *ompio_fh)
371391
if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){
372392
ompi_datatype_destroy (&ompio_fh->f_orig_filetype);
373393
}
374-
375-
394+
376395
if (MPI_COMM_NULL != ompio_fh->f_comm && !(ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) {
377396
ompi_comm_free (&ompio_fh->f_comm);
378397
}

ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
#include <unistd.h>
3939

4040
#include "opal/util/output.h"
41+
#include "opal/util/opal_getcwd.h"
42+
#include "opal/util/path.h"
43+
#include "opal/util/os_path.h"
4144

4245
int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
4346
const char* filename,
@@ -112,8 +115,26 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
112115
return OMPI_ERR_OUT_OF_RESOURCE;
113116
}
114117
snprintf(lockedfilename, filenamelen, "%s-%u-%d%s",filename,masterjobid,int_pid,".lock");
115-
module_data->filename = lockedfilename;
116-
118+
if (opal_path_is_absolute(lockedfilename) ) {
119+
module_data->filename = lockedfilename;
120+
} else {
121+
char path[OPAL_PATH_MAX];
122+
err = opal_getcwd(path, OPAL_PATH_MAX);
123+
if (OPAL_SUCCESS != err) {
124+
free (sh);
125+
free (module_data);
126+
free (lockedfilename);
127+
return err;
128+
}
129+
module_data->filename = opal_os_path(0, path, lockedfilename, NULL);
130+
if (NULL == module_data->filename){
131+
free (sh);
132+
free (module_data);
133+
free (lockedfilename);
134+
return OMPI_ERROR;
135+
}
136+
}
137+
117138
/*-------------------------------------------------*/
118139
/*Open the lockedfile without shared file pointer */
119140
/*-------------------------------------------------*/
@@ -131,8 +152,8 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
131152
free (lockedfilename);
132153
return OMPI_ERROR;
133154
}
134-
err = opal_best_effort_write ( handle, &position, sizeof(OMPI_MPI_OFFSET_TYPE) );
135-
if (OPAL_SUCCESS != err ) {
155+
err = opal_best_effort_write (handle, &position, sizeof(OMPI_MPI_OFFSET_TYPE));
156+
if (OPAL_SUCCESS != err) {
136157
free (sh);
137158
free (module_data);
138159
free (lockedfilename);

0 commit comments

Comments
 (0)