Skip to content

Commit 8d3ee0b

Browse files
committed
Update libfabric ver to 1.21
This should resolve provider hangs during AWS tests Signed-off-by: Alexey Rivkin <[email protected]>
1 parent 32da639 commit 8d3ee0b

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

src/utils/libfabric/libfabric_common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ getAvailableNetworkDevices() {
5050
hints->mode = FI_CONTEXT;
5151
hints->ep_attr->type = FI_EP_RDM;
5252

53-
int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
53+
int ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
5454
if (ret) {
5555
NIXL_ERROR << "fi_getinfo failed " << fi_strerror(-ret);
5656
fi_freeinfo(hints);

src/utils/libfabric/libfabric_rail.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ nixlLibfabricRail::nixlLibfabricRail(const std::string &device,
431431
hints->domain_attr->threading = FI_THREAD_SAFE;
432432
try {
433433
// Get fabric info for this specific device - first try with FI_HMEM
434-
int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
434+
int ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
435435

436436
// If no provider found with FI_HMEM, retry without it
437437
if (ret || !info) {
@@ -442,7 +442,7 @@ nixlLibfabricRail::nixlLibfabricRail(const std::string &device,
442442
hints->caps = FI_MSG | FI_RMA;
443443
hints->caps |= FI_LOCAL_COMM | FI_REMOTE_COMM;
444444

445-
ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
445+
ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
446446
if (ret) {
447447
NIXL_ERROR << "fi_getinfo failed for rail " << rail_id << ": " << fi_strerror(-ret);
448448
throw std::runtime_error("fi_getinfo failed for rail " + std::to_string(rail_id));

src/utils/libfabric/libfabric_topology.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ nixlLibfabricTopology::buildPcieToLibfabricMapping() {
381381
// This ensures consistency between device discovery and PCIe mapping
382382
hints->fabric_attr->prov_name = strdup(provider_name.c_str());
383383

384-
int ret = fi_getinfo(FI_VERSION(1, 18), NULL, NULL, 0, hints, &info);
384+
int ret = fi_getinfo(FI_VERSION(1, 21), NULL, NULL, 0, hints, &info);
385385
if (ret) {
386386
NIXL_ERROR << "fi_getinfo failed for PCIe mapping with provider " << provider_name << ": "
387387
<< fi_strerror(-ret);

0 commit comments

Comments
 (0)