22 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33 * University Research and Technology
44 * Corporation. All rights reserved.
5- * Copyright (c) 2004-2014 The University of Tennessee and The University
5+ * Copyright (c) 2004-2015 The University of Tennessee and The University
66 * of Tennessee Research Foundation. All rights
77 * reserved.
88 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -80,13 +80,13 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
8080 struct ompi_communicator_t * comm ,
8181 mca_coll_base_module_t * module )
8282{
83- int err , i , rank , root = 0 , rsize ;
83+ int err , i , rank , root = 0 , rsize , line ;
8484 ptrdiff_t lb , extent ;
8585 ptrdiff_t true_lb , true_extent ;
8686 char * tmpbuf = NULL , * pml_buffer = NULL ;
8787 ompi_request_t * req [2 ];
8888 mca_coll_basic_module_t * basic_module = (mca_coll_basic_module_t * ) module ;
89- ompi_request_t * * reqs = basic_module -> mccb_reqs ;
89+ ompi_request_t * * reqs = NULL ;
9090
9191 rank = ompi_comm_rank (comm );
9292 rsize = ompi_comm_remote_size (comm );
@@ -111,41 +111,33 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
111111 }
112112
113113 tmpbuf = (char * ) malloc (true_extent + (count - 1 ) * extent );
114- if (NULL == tmpbuf ) {
115- return OMPI_ERR_OUT_OF_RESOURCE ;
116- }
114+ if (NULL == tmpbuf ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto exit ; }
117115 pml_buffer = tmpbuf - true_lb ;
118116
117+ reqs = mca_coll_basic_get_reqs (basic_module , rsize - 1 );
118+ if ( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto exit ; }
119+
119120 /* Do a send-recv between the two root procs. to avoid deadlock */
120121 err = MCA_PML_CALL (irecv (rbuf , count , dtype , 0 ,
121122 MCA_COLL_BASE_TAG_ALLREDUCE , comm ,
122123 & (req [0 ])));
123- if (OMPI_SUCCESS != err ) {
124- goto exit ;
125- }
124+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
126125
127126 err = MCA_PML_CALL (isend (sbuf , count , dtype , 0 ,
128127 MCA_COLL_BASE_TAG_ALLREDUCE ,
129128 MCA_PML_BASE_SEND_STANDARD ,
130129 comm , & (req [1 ])));
131- if (OMPI_SUCCESS != err ) {
132- goto exit ;
133- }
130+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
134131
135132 err = ompi_request_wait_all (2 , req , MPI_STATUSES_IGNORE );
136- if (OMPI_SUCCESS != err ) {
137- goto exit ;
138- }
139-
133+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
140134
141135 /* Loop receiving and calling reduction function (C or Fortran). */
142136 for (i = 1 ; i < rsize ; i ++ ) {
143137 err = MCA_PML_CALL (recv (pml_buffer , count , dtype , i ,
144138 MCA_COLL_BASE_TAG_ALLREDUCE , comm ,
145139 MPI_STATUS_IGNORE ));
146- if (MPI_SUCCESS != err ) {
147- goto exit ;
148- }
140+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
149141
150142 /* Perform the reduction */
151143 ompi_op_reduce (op , pml_buffer , rbuf , count , dtype );
@@ -155,9 +147,7 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
155147 err = MCA_PML_CALL (send (sbuf , count , dtype , root ,
156148 MCA_COLL_BASE_TAG_ALLREDUCE ,
157149 MCA_PML_BASE_SEND_STANDARD , comm ));
158- if (OMPI_SUCCESS != err ) {
159- goto exit ;
160- }
150+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
161151 }
162152
163153
@@ -171,21 +161,16 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
171161 err = MCA_PML_CALL (irecv (pml_buffer , count , dtype , 0 ,
172162 MCA_COLL_BASE_TAG_ALLREDUCE ,
173163 comm , & (req [1 ])));
174- if (OMPI_SUCCESS != err ) {
175- goto exit ;
176- }
164+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
177165
178166 err = MCA_PML_CALL (isend (rbuf , count , dtype , 0 ,
179167 MCA_COLL_BASE_TAG_ALLREDUCE ,
180168 MCA_PML_BASE_SEND_STANDARD , comm ,
181169 & (req [0 ])));
182- if (OMPI_SUCCESS != err ) {
183- goto exit ;
184- }
170+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
171+
185172 err = ompi_request_wait_all (2 , req , MPI_STATUSES_IGNORE );
186- if (OMPI_SUCCESS != err ) {
187- goto exit ;
188- }
173+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
189174
190175 /* distribute the data to other processes in remote group.
191176 * Note that we start from 1 (not from zero), since zero
@@ -198,17 +183,13 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
198183 MCA_COLL_BASE_TAG_ALLREDUCE ,
199184 MCA_PML_BASE_SEND_STANDARD , comm ,
200185 & reqs [i - 1 ]));
201- if (OMPI_SUCCESS != err ) {
202- goto exit ;
203- }
186+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
204187 }
205188
206189 err =
207190 ompi_request_wait_all (rsize - 1 , reqs ,
208191 MPI_STATUSES_IGNORE );
209- if (OMPI_SUCCESS != err ) {
210- goto exit ;
211- }
192+ if (OMPI_SUCCESS != err ) { line = __LINE__ ; goto exit ; }
212193 }
213194 } else {
214195 err = MCA_PML_CALL (recv (rbuf , count , dtype , root ,
@@ -217,10 +198,14 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
217198 }
218199
219200 exit :
201+ if ( MPI_SUCCESS != err ) {
202+ OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,"%s:%4d\tError occurred %d, rank %2d" , __FILE__ ,
203+ line , err , rank ));
204+ mca_coll_basic_free_reqs (reqs , rsize - 1 );
205+ }
220206 if (NULL != tmpbuf ) {
221207 free (tmpbuf );
222208 }
223209
224-
225210 return err ;
226211}
0 commit comments