@@ -494,11 +494,11 @@ class AttentionOp
494494 mPosShiftEnabled , mPagedContextFMHA , mFP8ContextFMHA , mFP8AttenOutput , mFP8ContextMLA , mFP8GenerationMLA ,
495495 mChunkPrefillBufferBatchSize , mDenseContextFMHA , mHasFullAttentionMask , mIsSpecDecodingEnabled ,
496496 mUseSpecDecoding , mIsSpecDecTree , mSpecDecodingIsGenerationLengthVariable , mSpecDecodingMaxGenerationLength ,
497- mIsMLAEnabled , mIsGenerationMLA , mUseGenFlashMLA , mUseSparseAttention , mMLAParams . data (), mCpSize , mCpRank ,
498- mCpGroup , mNumAttnHeads , mNumAttnKVHeads , mNumKVHeadsOrigin , mAttnTpSize , mAttnTpRank , mAttnCpSize ,
499- mAttnCpRank , mUlyssesMQABroadcast , mEnableContextFMHA , mFMHAForceFP32Acc , mMultiBlockMode , mEnableXQA ,
500- mUseKVCache , mSkipAttn , mFuseFp4Quant , mRuntimeSparseAttentionParams . data (), mNbMultiBlockSemaphores ,
501- mAttentionChunkSize .value_or (-1 ));
497+ mIsMLAEnabled , mIsGenerationMLA , mUseGenFlashMLA , mUseSparseAttention , mUseTllmGenSparseAttention ,
498+ mMLAParams . data (), mCpSize , mCpRank , mCpGroup , mNumAttnHeads , mNumAttnKVHeads , mNumKVHeadsOrigin ,
499+ mAttnTpSize , mAttnTpRank , mAttnCpSize , mAttnCpRank , mUlyssesMQABroadcast , mEnableContextFMHA ,
500+ mFMHAForceFP32Acc , mMultiBlockMode , mEnableXQA , mUseKVCache , mSkipAttn , mFuseFp4Quant ,
501+ mRuntimeSparseAttentionParams . data (), mNbMultiBlockSemaphores , mAttentionChunkSize .value_or (-1 ));
502502 };
503503
504504private:
0 commit comments