Skip to content

Commit a9c9545

Browse files
authored
Honor user provided threshold for direct AG (#3056)
1 parent 267d122 commit a9c9545

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

projects/rccl/src/rccl_wrap.cc

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -454,16 +454,24 @@ bool rcclUseAllGatherDirect(struct ncclComm* comm, size_t& msgSize) {
454454
return false;
455455
}
456456

457+
// Check if user explicitly set threshold
458+
static int userThresholdInput = -2;
459+
if (userThresholdInput == -2) {
460+
const char *thresholdStr = getenv("RCCL_DIRECT_ALLGATHER_THRESHOLD");
461+
userThresholdInput = !thresholdStr ? 0 : 1;
462+
}
463+
457464
size_t threshold = rcclParamDirectAllGatherThreshold();
458465

459-
if (IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx950") && threshold != -1) {
460-
if (comm->nNodes == 1) {
461-
threshold = 8388608;
462-
} else if (comm->nNodes < 64) {
463-
threshold = comm->nNodes * 2097152;
464-
}
465-
} else if (IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942") && threshold != -1) {
466-
threshold = 4194304;
466+
// Only perform auto-selection if user didn't explicitly set the threshold and threshold is not -1
467+
if (!userThresholdInput && IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx950") && threshold != -1) {
468+
if (comm->nNodes == 1) {
469+
threshold = 8388608;
470+
} else if (comm->nNodes < 64) {
471+
threshold = comm->nNodes * 2097152;
472+
}
473+
} else if (!userThresholdInput && IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942") && threshold != -1) {
474+
threshold = 4194304;
467475
}
468476

469477
comm->enableCustColl = IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx950") || IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942");

0 commit comments

Comments
 (0)