fix params issue

heyuhhh · heyuhhh · commit 5ee2ed04639a · 2025-09-26T02:09:59.000Z
Signed-off-by: yuhangh &lt;58161490+heyuhhh@users.noreply.github.com&gt;
diff --git a/cpp/tensorrt_llm/common/attentionOp.h b/cpp/tensorrt_llm/common/attentionOp.h
@@ -494,11 +494,11 @@ class AttentionOp
             mPosShiftEnabled, mPagedContextFMHA, mFP8ContextFMHA, mFP8AttenOutput, mFP8ContextMLA, mFP8GenerationMLA,
             mChunkPrefillBufferBatchSize, mDenseContextFMHA, mHasFullAttentionMask, mIsSpecDecodingEnabled,
             mUseSpecDecoding, mIsSpecDecTree, mSpecDecodingIsGenerationLengthVariable, mSpecDecodingMaxGenerationLength,
-            mIsMLAEnabled, mIsGenerationMLA, mUseGenFlashMLA, mUseSparseAttention, mMLAParams.data(), mCpSize, mCpRank,
-            mCpGroup, mNumAttnHeads, mNumAttnKVHeads, mNumKVHeadsOrigin, mAttnTpSize, mAttnTpRank, mAttnCpSize,
-            mAttnCpRank, mUlyssesMQABroadcast, mEnableContextFMHA, mFMHAForceFP32Acc, mMultiBlockMode, mEnableXQA,
-            mUseKVCache, mSkipAttn, mFuseFp4Quant, mRuntimeSparseAttentionParams.data(), mNbMultiBlockSemaphores,
-            mAttentionChunkSize.value_or(-1));
+            mIsMLAEnabled, mIsGenerationMLA, mUseGenFlashMLA, mUseSparseAttention, mUseTllmGenSparseAttention,
+            mMLAParams.data(), mCpSize, mCpRank, mCpGroup, mNumAttnHeads, mNumAttnKVHeads, mNumKVHeadsOrigin,
+            mAttnTpSize, mAttnTpRank, mAttnCpSize, mAttnCpRank, mUlyssesMQABroadcast, mEnableContextFMHA,
+            mFMHAForceFP32Acc, mMultiBlockMode, mEnableXQA, mUseKVCache, mSkipAttn, mFuseFp4Quant,
+            mRuntimeSparseAttentionParams.data(), mNbMultiBlockSemaphores, mAttentionChunkSize.value_or(-1));
     };
 
 private:
diff --git a/cpp/tensorrt_llm/thop/attentionOp.cpp b/cpp/tensorrt_llm/thop/attentionOp.cpp
@@ -684,6 +684,8 @@ void attention(torch::Tensor q, torch::optional<torch::Tensor> k, torch::optiona
     op->mUseSpecDecoding = spec_decoding_bool_params[1];       // use_spec_decoding
     op->mIsSpecDecTree = spec_decoding_bool_params[2];         // is_spec_dec_tree
 
+    op->mUseSparseAttention = false;
+    op->mUseTllmGenSparseAttention = false;
     if ((sparse_kv_indices.has_value() && sparse_kv_indices.value().numel() > 0)
         || (sparse_attn_indices.has_value() && sparse_attn_indices.value().numel() > 0))
     {

Original file line number	Diff line number	Diff line change
`@@ -684,6 +684,8 @@ void attention(torch::Tensor q, torch::optional<torch::Tensor> k, torch::optiona`
`684`	`684`	`op->mUseSpecDecoding = spec_decoding_bool_params[1]; // use_spec_decoding`
`685`	`685`	`op->mIsSpecDecTree = spec_decoding_bool_params[2]; // is_spec_dec_tree`
`686`	`686`
	`687`	`+ op->mUseSparseAttention = false;`
	`688`	`+ op->mUseTllmGenSparseAttention = false;`
`687`	`689`	`if ((sparse_kv_indices.has_value() && sparse_kv_indices.value().numel() > 0)`
`688`	`690`	`\|\| (sparse_attn_indices.has_value() && sparse_attn_indices.value().numel() > 0))`
`689`	`691`	`{`