@@ -425,7 +425,7 @@ static struct tuningModel tuning_model_6 {
425425};
426426
427427static struct tuningModel tuning_model_7 {
428- .hwLat = {
428+ .hwLat = {
429429 /* NVLINK */
430430 { /* Tree (LL/LL128/Simple)*/ { 0.8 , 1.4 , 2.5 }, /* Ring (LL/LL128/Simple)*/ { 0.8 , 2.2 , 3.6 }, /* CollNetDirect (Simple)*/ { 0.0 , 0.0 , 0.8 }, /* CollNetChain (Simple)*/ { 0.0 , 0.0 , 1.4 }, /* NVLS */ { 0 , 0 , 0 }, /* NVLS Tree */ { 0 , 0 , 0 }, /* PAT */ { 0 , 0 , 3.6 } },
431431 /* PCI */
@@ -436,34 +436,34 @@ static struct tuningModel tuning_model_7 {
436436
437437 .bwRatio = {
438438 /* 2 nodes */
439- { /* Tree (LL/LL128/Simple)*/ { 0.051 , 0.22 , 0.64 }, /* Ring (LL/LL128/Simple)*/ { 0.74 , 0.34 , 1.00 }, /* CollNetDirect (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* CollNetChain (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* NVLS */ { 0 , 0 , 0 }, /* NVLS Tree */ { 0 , 0 , 0 }, /* PAT */ { 0 , 0 , 0 } },
439+ { /* Tree (LL/LL128/Simple)*/ { 0.62 , 0.62 , 1.61 }, /* Ring (LL/LL128/Simple)*/ { 0.20 , 0.20 , 1.00 }, /* CollNetDirect (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* CollNetChain (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* NVLS */ { 0 , 0 , 0 }, /* NVLS Tree */ { 0 , 0 , 0 }, /* PAT */ { 0 , 0 , 0 } },
440440 /* more than 2 nodes */
441- { /* Tree (LL/LL128/Simple)*/ { 0.051 , 0.22 , 0.64 }, /* Ring (LL/LL128/Simple)*/ { 0.74 , 0.34 , 1.00 }, /* CollNetDirect (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* CollNetChain (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* NVLS */ { 0 , 0 , 0 }, /* NVLS Tree */ { 0 , 0 , 0 }, /* PAT */ { 0 , 0 , 1.00 } },
441+ { /* Tree (LL/LL128/Simple)*/ { 0.62 , 0.62 , 1.61 }, /* Ring (LL/LL128/Simple)*/ { 0.20 , 0.20 , 1.00 }, /* CollNetDirect (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* CollNetChain (Simple)*/ { 0.00 , 0.00 , 1.00 }, /* NVLS */ { 0 , 0 , 0 }, /* NVLS Tree */ { 0 , 0 , 0 }, /* PAT */ { 0 , 0 , 1.00 } },
442442 },
443443
444444 .treeCorrectionFactor = {
445445 { 0.1 , 0.2 , 0.1 , 0.1 , 0.9 , 0.3 , 0.4 , 0.1 , 0.2 , 0.4 , 0.2 , 0.1 , 0.3 , 0.3 , 0.2 , 0.2 , 0.2 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , },
446446 { 0.1 , 0.3 , 1.0 , 0.1 , 0.5 , 1.0 , 0.9 , 1.0 , 1.0 , 1.0 , 0.3 , 0.1 , 0.4 , 0.5 , 0.5 , 0.4 , 0.4 , 0.3 , 0.3 , 0.2 , 0.2 , 0.2 , 0.2 , 0.2 , 0.2 , 0.2 , 0.2 , },
447- { 0.2 , 1.0 , 0.1 , 0.1 , 0.7 , 0.2 , 0.4 , 0.1 , 0.1 , 0.3 , 0.4 , 0.3 , 0.6 , 0.8 , 1.0 , 1.0 , 1.0 , 1.0 , 0.9 , 0.8 , 0.8 , 0.8 , 0.8 , 0.8 , 0.9 , 0.9 , 0.9 , },
447+ { 0.2 , 1.0 , 0.1 , 0.1 , 0.7 , 0.2 , 0.4 , 0.1 , 0.1 , 0.3 , 0.4 , 0.3 , 0.6 , 0.8 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.85 , 0.85 , 0.83 , 0.8 , 0.82 , 0.84 , 0.85 , 0.85 },
448448 },
449449
450450 .ringCorrectionFactor = {
451451 { 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.2 , 0.4 , 0.2 , 0.3 , 0.5 , 0.3 , 0.1 , 0.5 , 0.5 , 0.3 , 0.2 , 0.2 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , },
452452 { 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.1 , 0.3 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.8 , 0.7 , 0.5 , 0.4 , 0.4 , 0.3 , 0.3 , 0.3 , 0.3 , 0.3 , 0.3 , },
453- { 1.0 , 0.8 , 0.2 , 1.0 , 1.0 , 0.3 , 1.0 , 0.1 , 0.1 , 0.2 , 0.2 , 0.1 , 0.5 , 1.0 , 0.8 , 0.8 , 1.0 , 0.9 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , },
453+ { 1.0 , 0.8 , 0.2 , 1.0 , 1.0 , 0.3 , 1.0 , 0.1 , 0.1 , 0.2 , 0.2 , 0.1 , 0.5 , 1.0 , 0.8 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ,},
454454 },
455455 // Follow order in RcclTunableColls
456456 .llProtoRanges = {
457457 /* ReduceScatter*/
458- {/* LL (min/max/factor/thread_threshold)*/ {0 , 65536 , 1 , 16 }, /* LL64/128 (min/max/factor/thread_threshold)*/ {65536 , 8388608 , 1 , 64 }},
458+ {/* LL (min/max/factor/thread_threshold)*/ {0 , 32768 , 1 , 16 }, /* LL64/128 (min/max/factor/thread_threshold)*/ {32768 , 32768 , 1 , 64 }},
459459 /* AllGather*/
460- {/* LL (min/max/factor/thread_threshold)*/ {0 , 65536 , 1 , 16 }, /* LL64/128 (min/max/factor/thread_threshold)*/ {65536 , 8388608 , 1 , 64 }},
460+ {/* LL (min/max/factor/thread_threshold)*/ {0 , 32768 , 1 , 16 }, /* LL64/128 (min/max/factor/thread_threshold)*/ {32768 , 32768 , 1 , 64 }},
461461 /* AllReduce*/
462- {/* LL (min/max/factor/thread_threshold)*/ {0 , 1048576 , 1 , 0 },/* LL64/128 (min/max/factor/thread_threshold)*/ {1048576 , 70640910 , 3145728 , 0 }},
462+ {/* LL (min/max/factor/thread_threshold)*/ {0 , 32768 , 1 , 0 },/* LL64/128 (min/max/factor/thread_threshold)*/ {32768 , 32768 , 3145728 , 0 }},
463463 /* Reduce*/
464- {/* LL (min/max/factor/thread_threshold)*/ {0 , 16383 , 1 , 0 },/* LL64/128 (min/max/factor/thread_threshold)*/ {16383 , 16777216 , 1 , 0 }},
464+ {/* LL (min/max/factor/thread_threshold)*/ {0 , 0 , 1 , 0 },/* LL64/128 (min/max/factor/thread_threshold)*/ {16383 , 16383 , 1 , 0 }},
465465 /* Broadcast*/
466- {/* LL (min/max/factor/thread_threshold)*/ {0 , 2048 , 1 , 0 },/* LL64/128 (min/max/factor/thread_threshold)*/ {2048 , 16777216 , 1 , 0 }},
466+ {/* LL (min/max/factor/thread_threshold)*/ {0 , 32768 , 1 , 0 },/* LL64/128 (min/max/factor/thread_threshold)*/ {32768 , 32768 , 1 , 0 }},
467467 },
468468
469469 .channelThresholds = {
0 commit comments