Skip to content

Commit 8839b04

Browse files
committed
Fix qpIndex selection in ncclIbIrecv for AINIC mode in net_ib_rocm
In AINIC mode, comm->base.qpIndex is intentionally not updated inside the ncclIbIrecv recv-posting loop — it is deferred to ncclIbPostFifo so that CTS messages are sent on the correct QPs. However, the loop body was still using comm->base.qpIndex for QP selection, causing all iterations to post receives on the same QP instead of distributing them across all physical NICs in the merged device. Introduce curQpIndex that reads from the local qpIndex variable (which does advance each iteration) in AINIC mode, and from comm->base.qpIndex in the standard path. This ensures round-robin QP selection works correctly with both AINIC and non-AINIC configurations.
1 parent c0f9dd1 commit 8839b04

File tree

2 files changed

+10
-8
lines changed

2 files changed

+10
-8
lines changed

projects/rccl/ext-src/rocm_netib.patch

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ index 9bfd8dcf..4d3f0a08 100644
665665
struct ncclIbRequest* req;
666666
NCCLCHECK(ncclIbGetRequest(&comm->base, &req));
667667
req->type = NCCL_NET_IB_REQ_RECV;
668-
@@ -2586,50 +2800,64 @@ ncclResult_t ncclIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
668+
@@ -2586,50 +2800,65 @@ ncclResult_t ncclIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
669669
req->devBases[i] = &comm->devs[i].base;
670670
}
671671

@@ -697,7 +697,8 @@ index 9bfd8dcf..4d3f0a08 100644
697697
+ struct ibv_recv_wr* bad_wr;
698698
+ int qpIndex = comm->base.qpIndex;
699699
+ for (int i = 0; i < nqps; i++) {
700-
+ struct ncclIbQp* qp = comm->base.qps + comm->base.qpIndex;
700+
+ int curQpIndex = rcclAinicRoce ? qpIndex : comm->base.qpIndex;
701+
+ struct ncclIbQp* qp = comm->base.qps + curQpIndex;
701702
+ ncclIbAddEvent(req, qp->devIndex, &comm->devs[qp->devIndex].base);
702703
#ifdef NCCL_ENABLE_NET_PROFILING
703704
- // Start a QP event for every request in the multirecv and every qp
@@ -718,7 +719,7 @@ index 9bfd8dcf..4d3f0a08 100644
718719
+ for (int r = 0; r < n; r++) {
719720
+ int nEventHandles = req->pInfo[r].nEventHandles;
720721
+ assert(nEventHandles < MAX_QPS_PER_REQ);
721-
+ req->pInfo[r].qpIndex[nEventHandles] = comm->base.qpIndex;
722+
+ req->pInfo[r].qpIndex[nEventHandles] = curQpIndex;
722723
+ // Store info for profiler
723724
+ int64_t pluginId = NCCL_PROFILER_NET_TYPE_IB | NCCL_PROFILER_NET_IB_VER;
724725
+ req->pInfo[r].data.type = ncclProfileQp;
@@ -762,7 +763,7 @@ index 9bfd8dcf..4d3f0a08 100644
762763
}
763764

764765
ncclResult_t ncclIbIflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) {
765-
@@ -2698,6 +2926,8 @@ static int getReqQpIndex(struct ncclIbRequest* req, int request, int qpNumber) {
766+
@@ -2698,6 +2927,8 @@ static int getReqQpIndex(struct ncclIbRequest* req, int request, int qpNumber) {
766767
}
767768
#endif
768769

@@ -771,7 +772,7 @@ index 9bfd8dcf..4d3f0a08 100644
771772
ncclResult_t ncclIbTest(void* request, int* done, int* sizes) {
772773
struct ncclIbRequest *r = (struct ncclIbRequest*)request;
773774
*done = 0;
774-
@@ -2731,13 +2961,18 @@ ncclResult_t ncclIbTest(void* request, int* done, int* sizes) {
775+
@@ -2731,13 +2962,18 @@ ncclResult_t ncclIbTest(void* request, int* done, int* sizes) {
775776

776777
int totalWrDone = 0;
777778
int wrDone = 0;
@@ -792,7 +793,7 @@ index 9bfd8dcf..4d3f0a08 100644
792793
totalWrDone += wrDone;
793794
if (wrDone == 0) { TIME_CANCEL(3); } else { TIME_STOP(3); }
794795
if (wrDone == 0) continue;
795-
@@ -2889,7 +3124,7 @@ ncclResult_t rcclNetP2pPolicy(void* handle, int isP2p) {
796+
@@ -2889,7 +3125,7 @@ ncclResult_t rcclNetP2pPolicy(void* handle, int isP2p) {
796797
}
797798

798799
ncclNet_t ncclNetIb = {

projects/rccl/src/transport/net_ib_rocm.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2819,14 +2819,15 @@ ncclResult_t rocmIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
28192819
struct ibv_recv_wr* bad_wr;
28202820
int qpIndex = comm->base.qpIndex;
28212821
for (int i = 0; i < nqps; i++) {
2822-
struct ncclIbQp* qp = comm->base.qps + comm->base.qpIndex;
2822+
int curQpIndex = rcclAinicRoce ? qpIndex : comm->base.qpIndex;
2823+
struct ncclIbQp* qp = comm->base.qps + curQpIndex;
28232824
ncclIbAddEvent(req, qp->devIndex, &comm->devs[qp->devIndex].base);
28242825
#ifdef NCCL_ENABLE_NET_PROFILING
28252826
// Start a QP event for every request in the multirecv and every qp
28262827
for (int r = 0; r < n; r++) {
28272828
int nEventHandles = req->pInfo[r].nEventHandles;
28282829
assert(nEventHandles < MAX_QPS_PER_REQ);
2829-
req->pInfo[r].qpIndex[nEventHandles] = comm->base.qpIndex;
2830+
req->pInfo[r].qpIndex[nEventHandles] = curQpIndex;
28302831
// Store info for profiler
28312832
int64_t pluginId = NCCL_PROFILER_NET_TYPE_IB | NCCL_PROFILER_NET_IB_VER;
28322833
req->pInfo[r].data.type = ncclProfileQp;

0 commit comments

Comments
 (0)