Skip to content

Commit 6d9455b

Browse files
committed
sharedfp/lockedfile: don't ignore read/write retval
Ensure proper handling of read/write operations in case they are interrupted by a signal, i.e. retry the operation or raise an error. Signed-off-by: Edgar Gabriel <[email protected]>
1 parent 8e5894a commit 6d9455b

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#include <fcntl.h>
3838
#include <unistd.h>
3939

40+
#include "opal/util/output.h"
41+
4042
int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
4143
const char* filename,
4244
int amode,
@@ -125,12 +127,19 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
125127
opal_output(0, "[%d]mca_sharedfp_lockedfile_file_open: Error during file open\n",
126128
fh->f_rank);
127129
free (sh);
128-
free(module_data);
130+
free (module_data);
129131
free (lockedfilename);
130132
return OMPI_ERROR;
131133
}
132-
write ( handle, &position, sizeof(OMPI_MPI_OFFSET_TYPE) );
133-
close ( handle );
134+
err = opal_best_effort_write ( handle, &position, sizeof(OMPI_MPI_OFFSET_TYPE) );
135+
if (OPAL_SUCCESS != err ) {
136+
free (sh);
137+
free (module_data);
138+
free (lockedfilename);
139+
close (handle);
140+
return err;
141+
}
142+
close (handle);
134143
}
135144
err = comm->c_coll->coll_barrier ( comm, comm->c_coll->coll_barrier_module );
136145
if ( OMPI_SUCCESS != err ) {

ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_request_position.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
#include "ompi/constants.h"
2626
#include "ompi/mca/sharedfp/sharedfp.h"
2727
#include "ompi/mca/sharedfp/base/base.h"
28+
#include "opal/util/output.h"
29+
#include "opal/util/fd.h"
30+
2831

2932
/*Use fcntl to lock the hidden file which stores the current position*/
3033
#include <fcntl.h>
@@ -76,7 +79,10 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s
7679

7780
/* read from the file */
7881
lseek ( fd, 0, SEEK_SET );
79-
read ( fd, &buf, sizeof(OMPI_MPI_OFFSET_TYPE));
82+
ret = opal_fd_read ( fd, sizeof(OMPI_MPI_OFFSET_TYPE), &buf);
83+
if (OPAL_SUCCESS != ret ) {
84+
goto exit;
85+
}
8086
if ( mca_sharedfp_lockedfile_verbose ) {
8187
opal_output(ompi_sharedfp_base_framework.framework_output,
8288
"sharedfp_lockedfile_request_position: Read last_offset=%lld! ret=%d\n",buf, ret);
@@ -92,8 +98,11 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s
9298

9399
/* write to the file */
94100
lseek ( fd, 0, SEEK_SET );
95-
write ( fd, &position, sizeof(OMPI_MPI_OFFSET_TYPE));
96-
101+
ret = opal_best_effort_write ( fd, &position, sizeof(OMPI_MPI_OFFSET_TYPE));
102+
/* No need to handle error case here, the subsequent steps are identical
103+
in case of ret != OPAL_SUCCESS, namely release lock and return ret */
104+
105+
exit:
97106
/* unlock the file */
98107
if ( mca_sharedfp_lockedfile_verbose ) {
99108
opal_output(ompi_sharedfp_base_framework.framework_output,
@@ -115,7 +124,10 @@ int mca_sharedfp_lockedfile_request_position(struct mca_sharedfp_base_data_t * s
115124
if (fcntl(fd, F_SETLK, &fl) == -1) {
116125
opal_output(0,"sharedfp_lockedfile_request_position:failed to release lock for fd: %d\n",fd);
117126
opal_output(0,"error(%i): %s", errno, strerror(errno));
118-
return OMPI_ERROR;
127+
/* Only overwrite error code if it was OPAL_SUCCESS previously */
128+
if (OPAL_SUCCESS == ret ) {
129+
ret = OMPI_ERROR;
130+
}
119131
}
120132
else {
121133
if ( mca_sharedfp_lockedfile_verbose ) {

0 commit comments

Comments
 (0)