File tree Expand file tree Collapse file tree 1 file changed +3
-1
lines changed
cpp/tensorrt_llm/kernels/trtllmGenKernels/blockScaleMoe Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -647,7 +647,9 @@ void run(Data& data, void* stream)
647647 //
648648 // The upper bound is a strict requirement. The number of blocks should be determined by querying
649649 // the device properties, or conservatively low.
650- static int const numBlocksCoop = tensorrt_llm::common::getMultiProcessorCount ();
650+ static int const smCount = tensorrt_llm::common::getMultiProcessorCount ();
651+ // WAR: Reserve 8 SMs for overlapping kernels.
652+ int const numBlocksCoop = smCount - 8 ;
651653
652654 // Maximum number of tokens supported by the kernel using a cooperative launch.
653655 int const maxTokensCoop = (numBlocksCoop * numThreadsHist * 64 ) / data.mTopK ;
You can’t perform that action at this time.
0 commit comments