Skip to content

Commit 787d322

Browse files
lfr-0531heyuhhh
authored andcommitted
update block sparse attention kernel.
Signed-off-by: Fanrong Li <[email protected]> fix params issue Signed-off-by: yuhangh <[email protected]>
1 parent e53802c commit 787d322

File tree

67 files changed

+199
-197
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+199
-197
lines changed

cpp/tensorrt_llm/common/attentionOp.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -494,11 +494,11 @@ class AttentionOp
494494
mPosShiftEnabled, mPagedContextFMHA, mFP8ContextFMHA, mFP8AttenOutput, mFP8ContextMLA, mFP8GenerationMLA,
495495
mChunkPrefillBufferBatchSize, mDenseContextFMHA, mHasFullAttentionMask, mIsSpecDecodingEnabled,
496496
mUseSpecDecoding, mIsSpecDecTree, mSpecDecodingIsGenerationLengthVariable, mSpecDecodingMaxGenerationLength,
497-
mIsMLAEnabled, mIsGenerationMLA, mUseGenFlashMLA, mUseSparseAttention, mMLAParams.data(), mCpSize, mCpRank,
498-
mCpGroup, mNumAttnHeads, mNumAttnKVHeads, mNumKVHeadsOrigin, mAttnTpSize, mAttnTpRank, mAttnCpSize,
499-
mAttnCpRank, mUlyssesMQABroadcast, mEnableContextFMHA, mFMHAForceFP32Acc, mMultiBlockMode, mEnableXQA,
500-
mUseKVCache, mSkipAttn, mFuseFp4Quant, mRuntimeSparseAttentionParams.data(), mNbMultiBlockSemaphores,
501-
mAttentionChunkSize.value_or(-1));
497+
mIsMLAEnabled, mIsGenerationMLA, mUseGenFlashMLA, mUseSparseAttention, mUseTllmGenSparseAttention,
498+
mMLAParams.data(), mCpSize, mCpRank, mCpGroup, mNumAttnHeads, mNumAttnKVHeads, mNumKVHeadsOrigin,
499+
mAttnTpSize, mAttnTpRank, mAttnCpSize, mAttnCpRank, mUlyssesMQABroadcast, mEnableContextFMHA,
500+
mFMHAForceFP32Acc, mMultiBlockMode, mEnableXQA, mUseKVCache, mSkipAttn, mFuseFp4Quant,
501+
mRuntimeSparseAttentionParams.data(), mNbMultiBlockSemaphores, mAttentionChunkSize.value_or(-1));
502502
};
503503

504504
private:

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100Kernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticBlockSparseSwapsAbForGen_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.

0 commit comments

Comments
 (0)