File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
lightllm/models/llama/layer_infer Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -110,15 +110,15 @@ def _bind_attention(self):
110110 self ._context_attention_kernel = partial (
111111 LlamaTransformerLayerInfer ._context_attention_kernel_ppl_int8kv , self
112112 )
113- elif "ppl_int8kv_flashdecoding " in self .mode :
113+ elif "ppl_int8kv_flashdecoding_diverse " in self .mode :
114114 self ._token_attention_kernel = partial (
115115 LlamaTransformerLayerInfer ._token_decode_attention_ppl_int8kv_flashdecoding_diverse , self
116116 )
117117 self ._copy_kv_to_mem_cache = partial (LlamaTransformerLayerInfer ._copy_kv_to_mem_cache_ppl_int8kv , self )
118118 self ._context_attention_kernel = partial (
119119 LlamaTransformerLayerInfer ._context_attention_kernel_ppl_int8kv , self
120120 )
121- elif "ppl_int8kv_flashdecoding_diverse " in self .mode :
121+ elif "ppl_int8kv_flashdecoding " in self .mode :
122122 self ._token_attention_kernel = partial (
123123 LlamaTransformerLayerInfer ._token_decode_attention_ppl_int8kv_flashdecoding , self
124124 )
You can’t perform that action at this time.
0 commit comments