diff --git a/mooncake-transfer-engine/src/topology.cpp b/mooncake-transfer-engine/src/topology.cpp index 52f915da1..fc534e84c 100644 --- a/mooncake-transfer-engine/src/topology.cpp +++ b/mooncake-transfer-engine/src/topology.cpp @@ -203,8 +203,11 @@ static std::vector discoverCudaTopology( avail_hca.push_back(hca.name); } } + topology.push_back(TopologyEntry{.name = "cuda:" + std::to_string(i), + .preferred_hca = preferred_hca, + .avail_hca = avail_hca}); topology.push_back( - TopologyEntry{.name = "cuda:" + std::to_string(i), + TopologyEntry{.name = "gpu:" + std::to_string(i), .preferred_hca = std::move(preferred_hca), .avail_hca = std::move(avail_hca)}); } diff --git a/mooncake-transfer-engine/src/transfer_engine.cpp b/mooncake-transfer-engine/src/transfer_engine.cpp index 67874bd93..58013652d 100644 --- a/mooncake-transfer-engine/src/transfer_engine.cpp +++ b/mooncake-transfer-engine/src/transfer_engine.cpp @@ -20,6 +20,7 @@ #include #include +#include "config.h" #include "transfer_metadata_plugin.h" #include "transport/transport.h" @@ -150,8 +151,11 @@ int TransferEngine::init(const std::string &metadata_conn_string, } else { local_topology_->discover(filter_); } - LOG(INFO) << "Topology discovery complete. Found " + LOG(INFO) << "Auto-discovering topology complete: Found " << local_topology_->getHcaList().size() << " HCAs."; + if (globalConfig().trace) { + LOG(INFO) << "Topology:\n" << local_topology_->toString(); + } #ifdef USE_MNNVL if (local_topology_->getHcaList().size() > 0 && @@ -215,6 +219,9 @@ Transport *TransferEngine::installTransport(const std::string &proto, LOG(ERROR) << "Failed to parse NIC priority matrix"; return nullptr; } + if (globalConfig().trace) { + LOG(INFO) << "Install transport " << proto << " with custom topology:\n" << local_topology_->toString(); + } } transport = multi_transports_->installTransport(proto, local_topology_);