Skip to content

Commit 35ffd5f

Browse files
author
wangzaijun
committed
fix:
1 parent e0bb430 commit 35ffd5f

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

lightllm/models/llama/layer_infer/transformer_layer_infer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,15 @@ def _bind_attention(self):
110110
self._context_attention_kernel = partial(
111111
LlamaTransformerLayerInfer._context_attention_kernel_ppl_int8kv, self
112112
)
113-
elif "ppl_int8kv_flashdecoding" in self.mode:
113+
elif "ppl_int8kv_flashdecoding_diverse" in self.mode:
114114
self._token_attention_kernel = partial(
115115
LlamaTransformerLayerInfer._token_decode_attention_ppl_int8kv_flashdecoding_diverse, self
116116
)
117117
self._copy_kv_to_mem_cache = partial(LlamaTransformerLayerInfer._copy_kv_to_mem_cache_ppl_int8kv, self)
118118
self._context_attention_kernel = partial(
119119
LlamaTransformerLayerInfer._context_attention_kernel_ppl_int8kv, self
120120
)
121-
elif "ppl_int8kv_flashdecoding_diverse" in self.mode:
121+
elif "ppl_int8kv_flashdecoding" in self.mode:
122122
self._token_attention_kernel = partial(
123123
LlamaTransformerLayerInfer._token_decode_attention_ppl_int8kv_flashdecoding, self
124124
)

0 commit comments

Comments
 (0)