|
22 | 22 | * Copyright (c) 2014-2018 Research Organization for Information Science |
23 | 23 | * and Technology (RIST). All rights reserved. |
24 | 24 | * Copyright (c) 2014 Bull SAS. All rights reserved. |
| 25 | + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. |
25 | 26 | * $COPYRIGHT$ |
26 | 27 | * |
27 | 28 | * Additional copyrights may follow |
@@ -278,9 +279,6 @@ static int btl_openib_modex_send(void) |
278 | 279 | ); |
279 | 280 | /* For each module, add in the size of the per-CPC data */ |
280 | 281 | for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { |
281 | | - if (! mca_btl_openib_component.openib_btls[i]->allowed) { |
282 | | - continue; |
283 | | - } |
284 | 282 | for (j = 0; |
285 | 283 | j < mca_btl_openib_component.openib_btls[i]->num_cpcs; |
286 | 284 | ++j) { |
@@ -309,9 +307,6 @@ static int btl_openib_modex_send(void) |
309 | 307 | /* Pack each of the modules */ |
310 | 308 | for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { |
311 | 309 |
|
312 | | - if (! mca_btl_openib_component.openib_btls[i]->allowed) { |
313 | | - continue; |
314 | | - } |
315 | 310 | /* Pack the modex common message struct. */ |
316 | 311 | size = modex_message_size; |
317 | 312 |
|
@@ -633,38 +628,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, |
633 | 628 | * unless the user specifically requested to override this |
634 | 629 | * policy. For ancient OFED, only allow if user has set |
635 | 630 | * the MCA parameter. |
| 631 | + * |
| 632 | + * We emit a help message if Open MPI was configured without |
| 633 | + * UCX support if the port is configured to use infiniband for link |
| 634 | + * layer. If UCX support is available, don't emit help message |
| 635 | + * since UCX PML has higher priority than OB1 and this BTL will |
| 636 | + * not be used. |
636 | 637 | */ |
637 | | - if (! mca_btl_openib_component.allow_ib |
| 638 | + if (false == mca_btl_openib_component.allow_ib |
638 | 639 | #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET |
639 | 640 | && IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer |
640 | 641 | #endif |
641 | 642 | ) { |
642 | | - openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t)); |
643 | | - if(NULL == openib_btl) { |
644 | | - BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__)); |
645 | | - return OPAL_ERR_OUT_OF_RESOURCE; |
646 | | - } |
647 | | - memcpy(openib_btl, &mca_btl_openib_module, |
648 | | - sizeof(mca_btl_openib_module)); |
649 | | - ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); |
650 | | - ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; |
651 | | - openib_btl->port_num = (uint8_t) port_num; |
652 | | - openib_btl->allowed = false; |
653 | | - openib_btl->device = NULL; |
654 | | - openib_btl->device_name = strdup(ibv_get_device_name(device->ib_dev)); |
655 | | - OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t); |
656 | | - opal_list_append(btl_list, (opal_list_item_t*) ib_selected); |
657 | | - opal_pointer_array_add(device->device_btls, (void*) openib_btl); |
658 | | - ++device->btls; |
659 | | - ++mca_btl_openib_component.ib_num_btls; |
660 | | - if (-1 != mca_btl_openib_component.ib_max_btls && |
661 | | - mca_btl_openib_component.ib_num_btls >= |
662 | | - mca_btl_openib_component.ib_max_btls) { |
663 | | - return OPAL_ERR_VALUE_OUT_OF_BOUNDS; |
664 | | - } |
665 | | - return OPAL_SUCCESS; |
666 | | - } |
667 | | - |
| 643 | +#if !HAVE_UCX |
| 644 | + opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", |
| 645 | + true, opal_process_info.nodename, |
| 646 | + ibv_get_device_name(device->ib_dev), |
| 647 | + port_num); |
| 648 | +#endif |
| 649 | + return OPAL_ERR_NOT_FOUND; |
| 650 | + } |
668 | 651 |
|
669 | 652 | /* Ensure that the requested GID index (via the |
670 | 653 | btl_openib_gid_index MCA param) is within the GID table |
@@ -901,8 +884,6 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, |
901 | 884 | } |
902 | 885 | } |
903 | 886 |
|
904 | | - openib_btl->allowed = true; |
905 | | - |
906 | 887 | opal_list_append(btl_list, (opal_list_item_t*) ib_selected); |
907 | 888 | opal_pointer_array_add(device->device_btls, (void*) openib_btl); |
908 | 889 | ++device->btls; |
@@ -2999,29 +2980,27 @@ btl_openib_component_init(int *num_btl_modules, |
2999 | 2980 | ib_selected = (mca_btl_base_selected_module_t*)item; |
3000 | 2981 | openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module; |
3001 | 2982 |
|
3002 | | - if (openib_btl->allowed) { |
3003 | | - /* Search for a CPC that can handle this port */ |
3004 | | - ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl); |
3005 | | - /* If we get NOT_SUPPORTED, then no CPC was found for this |
3006 | | - port. But that's not a fatal error -- just keep going; |
3007 | | - let's see if we find any usable openib modules or not. */ |
3008 | | - if (OPAL_ERR_NOT_SUPPORTED == ret) { |
3009 | | - continue; |
3010 | | - } else if (OPAL_SUCCESS != ret) { |
3011 | | - /* All others *are* fatal. Note that we already did a |
3012 | | - show_help in the lower layer */ |
3013 | | - goto no_btls; |
3014 | | - } |
| 2983 | + /* Search for a CPC that can handle this port */ |
| 2984 | + ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl); |
| 2985 | + /* If we get NOT_SUPPORTED, then no CPC was found for this |
| 2986 | + port. But that's not a fatal error -- just keep going; |
| 2987 | + let's see if we find any usable openib modules or not. */ |
| 2988 | + if (OPAL_ERR_NOT_SUPPORTED == ret) { |
| 2989 | + continue; |
| 2990 | + } else if (OPAL_SUCCESS != ret) { |
| 2991 | + /* All others *are* fatal. Note that we already did a |
| 2992 | + show_help in the lower layer */ |
| 2993 | + goto no_btls; |
| 2994 | + } |
3015 | 2995 |
|
3016 | | - if (mca_btl_openib_component.max_hw_msg_size > 0 && |
3017 | | - (uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) { |
3018 | | - BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")", |
3019 | | - mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); |
3020 | | - } |
| 2996 | + if (mca_btl_openib_component.max_hw_msg_size > 0 && |
| 2997 | + (uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) { |
| 2998 | + BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")", |
| 2999 | + mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); |
| 3000 | + } |
3021 | 3001 |
|
3022 | | - if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { |
3023 | | - goto no_btls; |
3024 | | - } |
| 3002 | + if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { |
| 3003 | + goto no_btls; |
3025 | 3004 | } |
3026 | 3005 |
|
3027 | 3006 | mca_btl_openib_component.openib_btls[i] = openib_btl; |
|
0 commit comments