File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed
tensorrt_llm/_torch/custom_ops
tests/unittest/api_stability/references Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -327,15 +327,12 @@ def get_dense_gemm_approximate_cta_nums(
327327 STATE_SIZE as DISTRIBUTED_TOPK_STATE_SIZE
328328 from ..cute_dsl_kernels .blackwell .top_k .single_pass_multi_cta_radix_topk import \
329329 SinglePassMultiCTARadixTopKKernel
330- < << << << HEAD
331330 from ..cute_dsl_kernels .blackwell .top_k .single_pass_multi_cta_radix_topk_cluster import \
332331 STATE_SIZE as CLUSTER_TOPK_STATE_SIZE
333332 from ..cute_dsl_kernels .blackwell .top_k .single_pass_multi_cta_radix_topk_cluster import (
334333 SinglePassMultiCTARadixTopKClusterKernel , _query_max_cluster_size )
335334 from ..cute_dsl_kernels .blackwell .dense_gemm_persistent import \
336335 PersistentDenseGemmKernel
337- == == == =
338- >> >> >> > f6e66826d ([TRTLLM - 11289 ][fix ] Fix pre - commit formatting for CuTe DSL BF16 GEMM / BMM code )
339336 from ..cute_dsl_kernels .blackwell .utils import make_ptr
340337
341338 class CuteDSLNVFP4BlackwellRunner (TunableRunner ):
Original file line number Diff line number Diff line change @@ -251,6 +251,14 @@ methods:
251251 annotation : bool
252252 default : False
253253 status : prototype
254+ use_cute_dsl_bf16_bmm :
255+ annotation : bool
256+ default : False
257+ status : prototype
258+ use_cute_dsl_bf16_gemm :
259+ annotation : bool
260+ default : False
261+ status : prototype
254262 return_annotation : None
255263 generate :
256264 parameters :
You can’t perform that action at this time.
0 commit comments