Skip to content

Commit 28776c5

Browse files
authored
Merge pull request #7448 from edgargabriel/topic/individual-as-dummy-module
sharedfp/individual: defer error when not being able to open datafile
2 parents 9cfdc38 + df6e3e5 commit 28776c5

File tree

2 files changed

+49
-22
lines changed

2 files changed

+49
-22
lines changed

ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12-
* Copyright (c) 2013-2018 University of Houston. All rights reserved.
12+
* Copyright (c) 2013-2019 University of Houston. All rights reserved.
1313
* Copyright (c) 2015-2018 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
@@ -92,11 +92,18 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm,
9292
MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE,
9393
&(MPI_INFO_NULL->super), datafilehandle, false);
9494
if ( OMPI_SUCCESS != err) {
95-
opal_output(0, "mca_sharedfp_individual_file_open: Error during datafile file open\n");
95+
opal_output(ompi_sharedfp_base_framework.framework_output,
96+
"mca_sharedfp_individual_file_open: Error during datafile file open. Continuing anyway. \n");
9697
free (sh);
9798
free (datafilename);
9899
free (datafilehandle);
99-
return err;
100+
101+
// We reset the error code here to OMPI_SUCCESS since the individual component can act as
102+
// a dummy component, in case no sharedfp operations are used by the code. Invoking any write/read
103+
// operations will however lead to an error, since the sharedfp_data pointer will be NULL.
104+
sh = NULL;
105+
err = OMPI_SUCCESS;
106+
goto exit;
100107
}
101108

102109
/*----------------------------------------------------------*/
@@ -113,32 +120,48 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm,
113120
if ( NULL == metadatafilename ) {
114121
free (sh);
115122
free (datafilename);
123+
mca_common_ompio_file_close ( datafilehandle);
116124
free (datafilehandle);
117125
opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n");
118-
return OMPI_ERR_OUT_OF_RESOURCE;
126+
127+
sh=NULL;
128+
err = OMPI_ERR_OUT_OF_RESOURCE;
129+
goto exit;
119130
}
120131
snprintf ( metadatafilename, len, "%s%s%d", filename, ".metadata.",fh->f_rank);
121132

122133
metadatafilehandle = (ompio_file_t *)malloc(sizeof(ompio_file_t));
123134
if ( NULL == metadatafilehandle ) {
124135
free (sh);
125136
free (datafilename);
137+
mca_common_ompio_file_close ( datafilehandle);
126138
free (datafilehandle);
127139
free (metadatafilename);
128140
opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n");
129-
return OMPI_ERR_OUT_OF_RESOURCE;
141+
142+
sh = NULL;
143+
err = OMPI_ERR_OUT_OF_RESOURCE;
144+
goto exit;
130145
}
131146
err = mca_common_ompio_file_open ( MPI_COMM_SELF,metadatafilename,
132147
MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE,
133148
&(MPI_INFO_NULL->super), metadatafilehandle, false);
134149
if ( OMPI_SUCCESS != err) {
135-
opal_output(0, "mca_sharedfp_individual_file_open: Error during metadatafile file open\n");
150+
opal_output(ompi_sharedfp_base_framework.framework_output,
151+
"mca_sharedfp_individual_file_open: Error during metadatafile file open. Continuing anyway. \n");
136152
free (sh);
137153
free (datafilename);
154+
mca_common_ompio_file_close ( datafilehandle);
138155
free (datafilehandle);
139156
free (metadatafilename);
140157
free (metadatafilehandle);
141-
return err;
158+
159+
// We reset the error code here to OMPI_SUCCESS since the individual component can act as
160+
// a dummy component, in case no sharedfp operations are used by the code. Invoking any write/read
161+
// operations will however lead to an error, since the sharedfp_data pointer will be NULL.
162+
sh = NULL;
163+
err = OMPI_SUCCESS;
164+
goto exit;
142165
}
143166

144167
/*save the datafilehandle and metadatahandle in the sharedfp individual module data structure*/
@@ -150,6 +173,8 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm,
150173
headnode->metadatafilename = metadatafilename;
151174
}
152175

176+
177+
exit:
153178
/*save the sharedfp individual module data structure in the ompio filehandle structure*/
154179
fh->f_sharedfp_data = sh;
155180

ompi/mca/sharedfp/individual/sharedfp_individual_write.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,26 @@ int mca_sharedfp_individual_write (ompio_file_t *fh,
5454
/*Retrieve data structure for shared file pointer operations*/
5555
sh = fh->f_sharedfp_data;
5656
headnode = (mca_sharedfp_individual_header_record*)sh->selected_module_data;
57+
if ( NULL == headnode) {
58+
opal_output (0, "sharedfp_individual_write_ordered: headnode is NULL but file is open\n");
59+
return OMPI_ERROR;
60+
}
5761

58-
if (headnode) {
59-
/*Insert metadata record into a queue*/
60-
mca_sharedfp_individual_insert_metadata(OMPI_FILE_WRITE_SHARED, totalbytes, sh);
61-
62-
/*Write the data into individual file*/
63-
ret = mca_common_ompio_file_write_at ( headnode->datafilehandle,
64-
headnode->datafile_offset,
65-
buf, count, datatype, status);
66-
if ( OMPI_SUCCESS != ret ) {
67-
opal_output(0,"mca_sharedfp_individual_write: Error while writing the datafile \n");
68-
return -1;
69-
}
70-
71-
/* Update the datafileoffset*/
72-
headnode->datafile_offset = headnode->datafile_offset + totalbytes;
62+
/*Insert metadata record into a queue*/
63+
mca_sharedfp_individual_insert_metadata(OMPI_FILE_WRITE_SHARED, totalbytes, sh);
64+
65+
/*Write the data into individual file*/
66+
ret = mca_common_ompio_file_write_at ( headnode->datafilehandle,
67+
headnode->datafile_offset,
68+
buf, count, datatype, status);
69+
if ( OMPI_SUCCESS != ret ) {
70+
opal_output(0,"mca_sharedfp_individual_write: Error while writing the datafile \n");
71+
return -1;
7372
}
7473

74+
/* Update the datafileoffset*/
75+
headnode->datafile_offset = headnode->datafile_offset + totalbytes;
76+
7577
return ret;
7678
}
7779

0 commit comments

Comments
 (0)