File tree Expand file tree Collapse file tree 1 file changed +16
-8
lines changed
Expand file tree Collapse file tree 1 file changed +16
-8
lines changed Original file line number Diff line number Diff line change @@ -454,16 +454,24 @@ bool rcclUseAllGatherDirect(struct ncclComm* comm, size_t& msgSize) {
454454 return false ;
455455 }
456456
457+ // Check if user explicitly set threshold
458+ static int userThresholdInput = -2 ;
459+ if (userThresholdInput == -2 ) {
460+ const char *thresholdStr = getenv (" RCCL_DIRECT_ALLGATHER_THRESHOLD" );
461+ userThresholdInput = !thresholdStr ? 0 : 1 ;
462+ }
463+
457464 size_t threshold = rcclParamDirectAllGatherThreshold ();
458465
459- if (IsArchMatch (comm->topo ->nodes [GPU].nodes [0 ].gpu .gcn , " gfx950" ) && threshold != -1 ) {
460- if (comm->nNodes == 1 ) {
461- threshold = 8388608 ;
462- } else if (comm->nNodes < 64 ) {
463- threshold = comm->nNodes * 2097152 ;
464- }
465- } else if (IsArchMatch (comm->topo ->nodes [GPU].nodes [0 ].gpu .gcn , " gfx942" ) && threshold != -1 ) {
466- threshold = 4194304 ;
466+ // Only perform auto-selection if user didn't explicitly set the threshold and threshold is not -1
467+ if (!userThresholdInput && IsArchMatch (comm->topo ->nodes [GPU].nodes [0 ].gpu .gcn , " gfx950" ) && threshold != -1 ) {
468+ if (comm->nNodes == 1 ) {
469+ threshold = 8388608 ;
470+ } else if (comm->nNodes < 64 ) {
471+ threshold = comm->nNodes * 2097152 ;
472+ }
473+ } else if (!userThresholdInput && IsArchMatch (comm->topo ->nodes [GPU].nodes [0 ].gpu .gcn , " gfx942" ) && threshold != -1 ) {
474+ threshold = 4194304 ;
467475 }
468476
469477 comm->enableCustColl = IsArchMatch (comm->topo ->nodes [GPU].nodes [0 ].gpu .gcn , " gfx950" ) || IsArchMatch (comm->topo ->nodes [GPU].nodes [0 ].gpu .gcn , " gfx942" );
You can’t perform that action at this time.
0 commit comments