|
| 1 | +#include "link_manager.h" |
| 2 | +#include "ctx.h" |
| 3 | + |
| 4 | +#include <contrib/libs/ibdrv/include/infiniband/verbs.h> |
| 5 | + |
| 6 | +#include <util/generic/scope.h> |
| 7 | +#include <util/generic/string.h> |
| 8 | +#include <util/stream/output.h> |
| 9 | +#include <util/string/printf.h> |
| 10 | +#include <util/string/builder.h> |
| 11 | + |
| 12 | +#include <errno.h> |
| 13 | + |
| 14 | +#include <memory> |
| 15 | + |
| 16 | +#include <util/network/address.h> |
| 17 | + |
| 18 | +template <> |
| 19 | +struct std::less<ibv_gid> { |
| 20 | + std::size_t operator()(const ibv_gid& a, const ibv_gid& b) const { |
| 21 | + return std::tie(a.global.subnet_prefix, a.global.interface_id) < |
| 22 | + std::tie(b.global.subnet_prefix, b.global.interface_id); |
| 23 | + } |
| 24 | +}; |
| 25 | +template <> |
| 26 | +struct std::equal_to<ibv_gid> { |
| 27 | + bool operator()(const ibv_gid& a, const ibv_gid& b) const { |
| 28 | + return a.global.interface_id == b.global.interface_id |
| 29 | + && a.global.subnet_prefix == b.global.subnet_prefix; |
| 30 | + } |
| 31 | +}; |
| 32 | + |
| 33 | +namespace NInterconnect::NRdma::NLinkMgr { |
| 34 | + |
| 35 | +static class TRdmaLinkManager { |
| 36 | +public: |
| 37 | + const TCtxsMap& GetAllCtxs() { |
| 38 | + return CtxMap; |
| 39 | + } |
| 40 | + |
| 41 | + TRdmaCtx* GetCtx(const ibv_gid& gid) { |
| 42 | + auto it = std::lower_bound( |
| 43 | + CtxMap.begin(), CtxMap.end(), |
| 44 | + std::pair<ibv_gid, std::shared_ptr<NInterconnect::NRdma::TRdmaCtx>>{gid, nullptr}, |
| 45 | + [](const auto& a, const auto& b) { |
| 46 | + return std::less<ibv_gid>()(a.first, b.first); |
| 47 | + } |
| 48 | + ); |
| 49 | + if (it != CtxMap.end() && std::equal_to<ibv_gid>()(it->first, gid)) { |
| 50 | + return it->second.get(); |
| 51 | + } |
| 52 | + Cerr << "No RDMA context found for GID: " << gid << Endl; |
| 53 | + return nullptr; |
| 54 | + } |
| 55 | + |
| 56 | + TRdmaLinkManager() { |
| 57 | + ScanDevices(); |
| 58 | + } |
| 59 | +private: |
| 60 | + TCtxsMap CtxMap; |
| 61 | + |
| 62 | + void ScanDevices() { |
| 63 | + int numDevices = 0; |
| 64 | + int err; |
| 65 | + ibv_device** deviceList = ibv_get_device_list(&numDevices); |
| 66 | + if (!deviceList) { |
| 67 | + ErrNo = errno; |
| 68 | + Err = TString(strerror(errno)); |
| 69 | + return; |
| 70 | + } |
| 71 | + |
| 72 | + Y_DEFER{ ibv_free_device_list(deviceList); }; |
| 73 | + |
| 74 | + for (int i = 0; i < numDevices; i++) { |
| 75 | + ibv_device* dev = deviceList[i]; |
| 76 | + ibv_context* ctx = ibv_open_device(dev); |
| 77 | + if (!ctx) { |
| 78 | + Err = Sprintf("Failed to open ib device '%s'", ibv_get_device_name(dev)); |
| 79 | + continue; |
| 80 | + } |
| 81 | + |
| 82 | + ibv_pd* pd = ibv_alloc_pd(ctx); |
| 83 | + if (!pd) { |
| 84 | + ibv_close_device(ctx); |
| 85 | + continue; |
| 86 | + } |
| 87 | + |
| 88 | + auto deviceCtx = std::make_shared<TDeviceCtx>(ctx, pd); |
| 89 | + |
| 90 | + ibv_device_attr devAttrs; |
| 91 | + err = ibv_query_device(ctx, &devAttrs); |
| 92 | + |
| 93 | + if (err < 0) { |
| 94 | + continue; |
| 95 | + } |
| 96 | + |
| 97 | + for (uint8_t portNum = 1; portNum <= devAttrs.phys_port_cnt; portNum++) { |
| 98 | + ibv_port_attr portAttrs; |
| 99 | + err = ibv_query_port(ctx, portNum, &portAttrs); |
| 100 | + if (err == 0) { |
| 101 | + for (int gidIndex = 0; gidIndex < portAttrs.gid_tbl_len; gidIndex++ ) { |
| 102 | + auto ctx = TRdmaCtx::Create(deviceCtx, portNum, gidIndex); |
| 103 | + if (!ctx) { |
| 104 | + continue; |
| 105 | + } |
| 106 | + |
| 107 | + CtxMap.emplace_back(ctx->GetGid(), ctx); |
| 108 | + } |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + std::sort(CtxMap.begin(), CtxMap.end(), |
| 113 | + [](const auto& a, const auto& b) { |
| 114 | + return std::less<ibv_gid>()(a.first, b.first); |
| 115 | + }); |
| 116 | + |
| 117 | + // check for duplicates |
| 118 | + for (size_t i = 0; i < CtxMap.size(); ++i) { |
| 119 | + auto ctx = CtxMap[i].second; |
| 120 | + ctx->DeviceIndex = i; |
| 121 | + |
| 122 | + if (i > 0) { |
| 123 | + auto prevCtx = CtxMap[i - 1].second; |
| 124 | + if (std::equal_to<ibv_gid>()(prevCtx->GetGid(), ctx->GetGid())) { |
| 125 | + Cerr << "Duplicate GID found: ctx1=" << prevCtx->ToString() << ", ctx2=" << ctx->ToString() << Endl; |
| 126 | + } |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + int ErrNo = 0; |
| 132 | + TString Err; |
| 133 | + |
| 134 | +} RdmaLinkManager; |
| 135 | + |
| 136 | +TRdmaCtx* GetCtx(int sockfd) { |
| 137 | + sockaddr_storage addr; |
| 138 | + socklen_t addrLen = sizeof(addr); |
| 139 | + if (getsockname(sockfd, reinterpret_cast<sockaddr*>(&addr), &addrLen) < 0) { |
| 140 | + Cerr << "getsockname failed: " << strerror(errno) << Endl; |
| 141 | + return nullptr; |
| 142 | + } |
| 143 | + sockaddr_in6* addr_in6 = (sockaddr_in6*)&addr; |
| 144 | + char str[INET6_ADDRSTRLEN]; |
| 145 | + inet_ntop(AF_INET6, &addr_in6->sin6_addr, str, INET6_ADDRSTRLEN); |
| 146 | + return GetCtx(addr_in6->sin6_addr); |
| 147 | +} |
| 148 | + |
| 149 | +TRdmaCtx* GetCtx(const in6_addr& ip) { |
| 150 | + const ibv_gid* gid = reinterpret_cast<const ibv_gid*>(&ip); |
| 151 | + return RdmaLinkManager.GetCtx(*gid); |
| 152 | +} |
| 153 | + |
| 154 | +const TCtxsMap& GetAllCtxs() { |
| 155 | + return RdmaLinkManager.GetAllCtxs(); |
| 156 | +} |
| 157 | + |
| 158 | +} |
0 commit comments