2727 */
2828
2929#include "opal_config.h"
30+ #include "opal/util/show_help.h"
3031
3132#include "btl_vader.h"
3233#include "btl_vader_endpoint.h"
@@ -79,6 +80,28 @@ mca_btl_vader_t mca_btl_vader = {
7980 }
8081};
8182
83+ /*
84+ * Exit function copied from btl_usnic_util.c
85+ *
86+ * The following comment tells Coverity that this function does not return.
87+ * See https://scan.coverity.com/tune.
88+ */
89+
90+ /* coverity[+kill] */
91+ static void vader_btl_exit (mca_btl_vader_t * btl )
92+ {
93+ if (NULL != btl && NULL != btl -> error_cb ) {
94+ btl -> error_cb (& btl -> super , MCA_BTL_ERROR_FLAGS_FATAL ,
95+ (opal_proc_t * ) opal_proc_local_get (),
96+ "The vader BTL is aborting the MPI job (via PML error callback)." );
97+ }
98+
99+ /* If the PML error callback returns (or if there wasn't one), just exit. Shrug. */
100+ fprintf (stderr , "*** The Open MPI vader BTL is aborting the MPI job (via exit(3)).\n" );
101+ fflush (stderr );
102+ exit (1 );
103+ }
104+
82105static int vader_btl_first_time_init (mca_btl_vader_t * vader_btl , int n )
83106{
84107 mca_btl_vader_component_t * component = & mca_btl_vader_component ;
@@ -173,6 +196,7 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
173196static int init_vader_endpoint (struct mca_btl_base_endpoint_t * ep , struct opal_proc_t * proc , int remote_rank ) {
174197 mca_btl_vader_component_t * component = & mca_btl_vader_component ;
175198 union vader_modex_t * modex ;
199+ ino_t my_user_ns_id ;
176200 size_t msg_size ;
177201 int rc ;
178202
@@ -197,17 +221,58 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
197221 } else {
198222#endif
199223 /* store a copy of the segment information for detach */
200- ep -> segment_data .other .seg_ds = malloc (msg_size );
224+ ep -> segment_data .other .seg_ds = malloc (modex -> other . seg_ds_size );
201225 if (NULL == ep -> segment_data .other .seg_ds ) {
202226 return OPAL_ERR_OUT_OF_RESOURCE ;
203227 }
204228
205- memcpy (ep -> segment_data .other .seg_ds , & modex -> seg_ds , msg_size );
229+ memcpy (ep -> segment_data .other .seg_ds , & modex -> other . seg_ds , modex -> other . seg_ds_size );
206230
207231 ep -> segment_base = opal_shmem_segment_attach (ep -> segment_data .other .seg_ds );
208232 if (NULL == ep -> segment_base ) {
209233 return OPAL_ERROR ;
210234 }
235+
236+ if (MCA_BTL_VADER_CMA == mca_btl_vader_component .single_copy_mechanism ) {
237+ my_user_ns_id = mca_btl_vader_get_user_ns_id ();
238+ if (my_user_ns_id != modex -> other .user_ns_id ) {
239+ mca_base_var_source_t source ;
240+ int vari ;
241+ rc = mca_base_var_find_by_name ("btl_vader_single_copy_mechanism" , & vari );
242+ if (OPAL_ERROR == rc ) {
243+ return OPAL_ERROR ;
244+ }
245+ rc = mca_base_var_get_value (vari , NULL , & source , NULL );
246+ if (OPAL_ERROR == rc ) {
247+ return OPAL_ERROR ;
248+ }
249+ /*
250+ * CMA is not possible as different user namespaces are in use.
251+ * Currently the kernel does not allow * process_vm_{read,write}v()
252+ * for processes running in different user namespaces even if
253+ * all involved user IDs are mapped to the same user ID.
254+ *
255+ * Fallback to MCA_BTL_VADER_EMUL.
256+ */
257+ if (MCA_BASE_VAR_SOURCE_DEFAULT != source ) {
258+ /* If CMA has been explicitly selected we want to error out */
259+ opal_show_help ("help-btl-vader.txt" , "cma-different-user-namespace-error" ,
260+ true, opal_process_info .nodename );
261+ vader_btl_exit (& mca_btl_vader );
262+ }
263+ /*
264+ * If CMA has been selected because it is the default or
265+ * some fallback, this falls back even further.
266+ */
267+ opal_show_help ("help-btl-vader.txt" , "cma-different-user-namespace-warning" ,
268+ true, opal_process_info .nodename );
269+ mca_btl_vader_component .single_copy_mechanism = MCA_BTL_VADER_EMUL ;
270+ mca_btl_vader .super .btl_get = mca_btl_vader_get_sc_emu ;
271+ mca_btl_vader .super .btl_put = mca_btl_vader_put_sc_emu ;
272+ mca_btl_vader .super .btl_put_limit = mca_btl_vader .super .btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t );
273+ mca_btl_vader .super .btl_get_limit = mca_btl_vader .super .btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t );
274+ }
275+ }
211276#if OPAL_BTL_VADER_HAVE_XPMEM
212277 }
213278#endif
0 commit comments