27
27
*/
28
28
29
29
#include "opal_config.h"
30
+ #include "opal/util/show_help.h"
30
31
31
32
#include "btl_vader.h"
32
33
#include "btl_vader_endpoint.h"
@@ -79,6 +80,28 @@ mca_btl_vader_t mca_btl_vader = {
79
80
}
80
81
};
81
82
83
+ /*
84
+ * Exit function copied from btl_usnic_util.c
85
+ *
86
+ * The following comment tells Coverity that this function does not return.
87
+ * See https://scan.coverity.com/tune.
88
+ */
89
+
90
+ /* coverity[+kill] */
91
+ static void vader_btl_exit (mca_btl_vader_t * btl )
92
+ {
93
+ if (NULL != btl && NULL != btl -> error_cb ) {
94
+ btl -> error_cb (& btl -> super , MCA_BTL_ERROR_FLAGS_FATAL ,
95
+ (opal_proc_t * ) opal_proc_local_get (),
96
+ "The vader BTL is aborting the MPI job (via PML error callback)." );
97
+ }
98
+
99
+ /* If the PML error callback returns (or if there wasn't one), just exit. Shrug. */
100
+ fprintf (stderr , "*** The Open MPI vader BTL is aborting the MPI job (via exit(3)).\n" );
101
+ fflush (stderr );
102
+ exit (1 );
103
+ }
104
+
82
105
static int vader_btl_first_time_init (mca_btl_vader_t * vader_btl , int n )
83
106
{
84
107
mca_btl_vader_component_t * component = & mca_btl_vader_component ;
@@ -173,6 +196,7 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
173
196
static int init_vader_endpoint (struct mca_btl_base_endpoint_t * ep , struct opal_proc_t * proc , int remote_rank ) {
174
197
mca_btl_vader_component_t * component = & mca_btl_vader_component ;
175
198
union vader_modex_t * modex ;
199
+ ino_t my_user_ns_id ;
176
200
size_t msg_size ;
177
201
int rc ;
178
202
@@ -197,17 +221,58 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
197
221
} else {
198
222
#endif
199
223
/* store a copy of the segment information for detach */
200
- ep -> segment_data .other .seg_ds = malloc (msg_size );
224
+ ep -> segment_data .other .seg_ds = malloc (modex -> other . seg_ds_size );
201
225
if (NULL == ep -> segment_data .other .seg_ds ) {
202
226
return OPAL_ERR_OUT_OF_RESOURCE ;
203
227
}
204
228
205
- memcpy (ep -> segment_data .other .seg_ds , & modex -> seg_ds , msg_size );
229
+ memcpy (ep -> segment_data .other .seg_ds , & modex -> other . seg_ds , modex -> other . seg_ds_size );
206
230
207
231
ep -> segment_base = opal_shmem_segment_attach (ep -> segment_data .other .seg_ds );
208
232
if (NULL == ep -> segment_base ) {
209
233
return OPAL_ERROR ;
210
234
}
235
+
236
+ if (MCA_BTL_VADER_CMA == mca_btl_vader_component .single_copy_mechanism ) {
237
+ my_user_ns_id = mca_btl_vader_get_user_ns_id ();
238
+ if (my_user_ns_id != modex -> other .user_ns_id ) {
239
+ mca_base_var_source_t source ;
240
+ int vari ;
241
+ rc = mca_base_var_find_by_name ("btl_vader_single_copy_mechanism" , & vari );
242
+ if (OPAL_ERROR == rc ) {
243
+ return OPAL_ERROR ;
244
+ }
245
+ rc = mca_base_var_get_value (vari , NULL , & source , NULL );
246
+ if (OPAL_ERROR == rc ) {
247
+ return OPAL_ERROR ;
248
+ }
249
+ /*
250
+ * CMA is not possible as different user namespaces are in use.
251
+ * Currently the kernel does not allow * process_vm_{read,write}v()
252
+ * for processes running in different user namespaces even if
253
+ * all involved user IDs are mapped to the same user ID.
254
+ *
255
+ * Fallback to MCA_BTL_VADER_EMUL.
256
+ */
257
+ if (MCA_BASE_VAR_SOURCE_DEFAULT != source ) {
258
+ /* If CMA has been explicitly selected we want to error out */
259
+ opal_show_help ("help-btl-vader.txt" , "cma-different-user-namespace-error" ,
260
+ true, opal_process_info .nodename );
261
+ vader_btl_exit (& mca_btl_vader );
262
+ }
263
+ /*
264
+ * If CMA has been selected because it is the default or
265
+ * some fallback, this falls back even further.
266
+ */
267
+ opal_show_help ("help-btl-vader.txt" , "cma-different-user-namespace-warning" ,
268
+ true, opal_process_info .nodename );
269
+ mca_btl_vader_component .single_copy_mechanism = MCA_BTL_VADER_EMUL ;
270
+ mca_btl_vader .super .btl_get = mca_btl_vader_get_sc_emu ;
271
+ mca_btl_vader .super .btl_put = mca_btl_vader_put_sc_emu ;
272
+ mca_btl_vader .super .btl_put_limit = mca_btl_vader .super .btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t );
273
+ mca_btl_vader .super .btl_get_limit = mca_btl_vader .super .btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t );
274
+ }
275
+ }
211
276
#if OPAL_BTL_VADER_HAVE_XPMEM
212
277
}
213
278
#endif
0 commit comments