@@ -590,25 +590,6 @@ static void free_filter(usnic_if_filter_t *filter)
590590 free (filter );
591591}
592592
593- static int do_fi_getinfo (uint32_t version , struct fi_info * * info_list )
594- {
595- struct fi_info hints = {0 };
596- struct fi_ep_attr ep_attr = {0 };
597- struct fi_fabric_attr fabric_attr = {0 };
598-
599- /* We only want providers named "usnic" that are of type EP_DGRAM */
600- fabric_attr .prov_name = "usnic" ;
601- ep_attr .type = FI_EP_DGRAM ;
602-
603- hints .caps = FI_MSG ;
604- hints .mode = FI_LOCAL_MR | FI_MSG_PREFIX ;
605- hints .addr_format = FI_SOCKADDR ;
606- hints .ep_attr = & ep_attr ;
607- hints .fabric_attr = & fabric_attr ;
608-
609- return fi_getinfo (version , NULL , 0 , 0 , & hints , info_list );
610- }
611-
612593/*
613594 * UD component initialization:
614595 * (1) read interface list from kernel and compare against component
@@ -652,62 +633,93 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
652633
653634 OBJ_CONSTRUCT (& btl_usnic_lock , opal_recursive_mutex_t );
654635
655- /* This code understands libfabric API versions v1.0, v1.1, and
656- v1.4. Even if we were compiled with libfabric API v1.0, we
657- still want to request v1.1 -- here's why:
658-
659- - In libfabric v1.0.0 (i.e., API v1.0), the usnic provider did
660- not check the value of the "version" parameter passed into
661- fi_getinfo()
662-
663- - If you pass FI_VERSION(1,0) to libfabric v1.1.0 (i.e., API
664- v1.1), the usnic provider will disable FI_MSG_PREFIX support
665- (on the assumption that the application will not handle
666- FI_MSG_PREFIX properly). This can happen if you compile OMPI
667- against libfabric v1.0.0 (i.e., API v1.0) and run OMPI
668- against libfabric v1.1.0 (i.e., API v1.1).
669-
670- So never request API v1.0 -- always request a minimum of
671- v1.1.
672-
673- The usnic provider changed the strings in the fabric and domain
674- names in API v1.4. With API <= v1.3:
636+ /* There are multiple dimensions to consider when requesting an
637+ API version number from libfabric:
638+
639+ 1. This code understands libfabric API versions v1.3 through
640+ v1.4.
641+
642+ 2. Open MPI may be *compiled* against one version of libfabric,
643+ but may be *running* with another.
644+
645+ 3. There were usnic-specific bugs in Libfabric prior to
646+ libfabric v1.3.0 (where "v1.3.0" is the tarball/package
647+ version, not the API version; but happily, the API version
648+ was also 1.3 in Libfabric v1.3.0):
649+
650+ - In libfabric v1.0.0 (i.e., API v1.0), the usnic provider
651+ did not check the value of the "version" parameter passed
652+ into fi_getinfo()
653+ - If you pass FI_VERSION(1,0) to libfabric v1.1.0 (i.e., API
654+ v1.1), the usnic provider will disable FI_MSG_PREFIX
655+ support (on the assumption that the application will not
656+ handle FI_MSG_PREFIX properly). This can happen if you
657+ compile OMPI against libfabric v1.0.0 (i.e., API v1.0) and
658+ run OMPI against libfabric v1.1.0 (i.e., API v1.1).
659+ - Some critical AV bug fixes were included in libfabric
660+ v1.3.0; prior versions can fail in fi_av_* operations in
661+ unexpected ways (libnl: you win again!).
662+
663+ So always request a minimum API version of v1.3.
664+
665+ Note that the FI_MAJOR_VERSION and FI_MINOR_VERSION in
666+ <rdma/fabric.h> represent the API version, not the Libfabric
667+ package (i.e., tarball) version. As of Libfabric v1.3, there
668+ is currently no way to know a) what package version of
669+ Libfabric you were compiled against, and b) what package
670+ version of Libfabric you are running with.
671+
672+ Also note that the usnic provider changed the strings in the
673+ fabric and domain names in API v1.4. With API <= v1.3:
675674
676675 - fabric name is "usnic_X" (device name)
677676 - domain name is NULL
678677
679- With libfabric API >= v1.4:
678+ With libfabric API >= v1.4, all Libfabric IP-based providers
679+ (including usnic) follow the same convention:
680680
681681 - fabric name is "a.b.c.d/e" (CIDR notation of network)
682682 - domain name is "usnic_X" (device name)
683683
684684 NOTE: The configure.m4 in this component will require libfabric
685- >= v1.1.0 (i.e., it won't accept v1.0.0) because of a critical
686- bug in the usnic provider in libfabric v1.0.0. However, the
687- compatibility code with libfabric v1.0.0 in the usNIC BTL has
688- been retained, for two reasons:
689-
690- 1. It's not harmful, nor overly complicated. So the
691- compatibility code was not ripped out.
692- 2. At least some versions of Cisco Open MPI are shipping with
693- an embedded (libfabric v1.0.0+critical bug fix).
685+ >= v1.1.0 (i.e., it won't accept v1.0.0) because it needs
686+ access to the usNIC extension header structures that only
687+ became available in v1.1.0.*/
694688
695- Someday, #2 may no longer be true, and we may therefore rip out
696- the libfabric v1.0.0 compatibility code. */
697-
698- /* First try API version 1.4. If that doesn't work, try API
699- version 1.1. */
689+ /* First, check to see if the libfabric we are running with is <=
690+ libfabric v1.3. If so, don't bother going further. */
700691 uint32_t libfabric_api ;
701- libfabric_api = FI_VERSION (1 , 4 );
702- ret = do_fi_getinfo (libfabric_api , & info_list );
703- // Libfabric core will return -FI_ENOSYS if it is too old
704- if (- FI_ENOSYS == ret ) {
705- libfabric_api = FI_VERSION (1 , 1 );
706- ret = do_fi_getinfo (libfabric_api , & info_list );
692+ libfabric_api = fi_version ();
693+ if (libfabric_api < FI_VERSION (1 , 3 )) {
694+ opal_output_verbose (5 , USNIC_OUT ,
695+ "btl:usnic: disqualifiying myself because Libfabric does not support v1.3 of the API (v1.3 is *required* for correct usNIC functionality)." );
696+ return NULL ;
697+ }
698+
699+ /* Libfabric API 1.3 is fine. Above that, we know that Open MPI
700+ works with libfabric API v1.4, so just use that. */
701+ if (libfabric_api > FI_VERSION (1 , 3 )) {
702+ libfabric_api = FI_VERSION (1 , 4 );
707703 }
704+
705+ struct fi_info hints = {0 };
706+ struct fi_ep_attr ep_attr = {0 };
707+ struct fi_fabric_attr fabric_attr = {0 };
708+
709+ /* We only want providers named "usnic" that are of type EP_DGRAM */
710+ fabric_attr .prov_name = "usnic" ;
711+ ep_attr .type = FI_EP_DGRAM ;
712+
713+ hints .caps = FI_MSG ;
714+ hints .mode = FI_LOCAL_MR | FI_MSG_PREFIX ;
715+ hints .addr_format = FI_SOCKADDR ;
716+ hints .ep_attr = & ep_attr ;
717+ hints .fabric_attr = & fabric_attr ;
718+
719+ ret = fi_getinfo (libfabric_api , NULL , 0 , 0 , & hints , & info_list );
708720 if (0 != ret ) {
709721 opal_output_verbose (5 , USNIC_OUT ,
710- "btl:usnic: disqualifiying myself due to fi_getinfo failure: %s (%d)" , strerror (- ret ), ret );
722+ "btl:usnic: disqualifiying myself due to fi_getinfo(3) failure: %s (%d)" , strerror (- ret ), ret );
711723 return NULL ;
712724 }
713725
@@ -738,29 +750,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
738750 opal_output_verbose (5 , USNIC_OUT ,
739751 "btl:usnic: usNIC fabrics found" );
740752
741- /* Due to ambiguities in documentation, in libfabric v1.0.0 (i.e.,
742- API v1.0) the usnic provider returned sizeof(struct
743- fi_cq_err_entry) from fi_cq_readerr() upon success.
744-
745- The ambiguities were clarified in libfabric v1.1.0 (i.e., API
746- v1.1); the usnic provider returned 1 from fi_cq_readerr() upon
747- success.
748-
749- So query to see what version of the libfabric API we are
750- running with, and adapt accordingly. */
751- libfabric_api = fi_version ();
752- if (1 == FI_MAJOR (libfabric_api ) &&
753- 0 == FI_MINOR (libfabric_api )) {
754- // Old fi_cq_readerr() behavior: success=sizeof(...), try again=0
755- mca_btl_usnic_component .cq_readerr_success_value =
756- sizeof (struct fi_cq_err_entry );
757- mca_btl_usnic_component .cq_readerr_try_again_value = 0 ;
758- } else {
759- // New fi_cq_readerr() behavior: success=1, try again=-FI_EAGAIN
760- mca_btl_usnic_component .cq_readerr_success_value = 1 ;
761- mca_btl_usnic_component .cq_readerr_try_again_value = - FI_EAGAIN ;
762- }
763-
764753 opal_proc_t * me = opal_proc_local_get ();
765754 opal_process_name_t * name = & (me -> proc_name );
766755 mca_btl_usnic_component .my_hashed_rte_name =
@@ -1256,12 +1245,11 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
12561245 }
12571246
12581247 rc = fi_cq_readerr (channel -> cq , & err_entry , 0 );
1259- if (rc == mca_btl_usnic_component . cq_readerr_try_again_value ) {
1248+ if (rc == - FI_EAGAIN ) {
12601249 return ;
1261- } else if (rc != mca_btl_usnic_component .cq_readerr_success_value ) {
1262- BTL_ERROR (("%s: cq_readerr ret = %d (expected %d)" ,
1263- module -> linux_device_name , rc ,
1264- (int ) mca_btl_usnic_component .cq_readerr_success_value ));
1250+ } else if (rc != 1 ) {
1251+ BTL_ERROR (("%s: cq_readerr ret = %d (expected 1)" ,
1252+ module -> linux_device_name , rc ));
12651253 channel -> chan_error = true;
12661254 }
12671255
0 commit comments