clear kvcache

ZHEQIUSHUI · ZHEQIUSHUI · commit d94638dbfc9f · 2025-03-31T10:02:40.000+08:00
diff --git a/src/runner/LLM.hpp b/src/runner/LLM.hpp
@@ -609,6 +609,14 @@ class LLM
 
         final_out = tokenizer->Decode(token_ids);
 
+        for (size_t i = 0; i < _attr.axmodel_num; i++)
+        {
+            memset(llama_layers[i].layer.get_input(prefill_grpid, "K_cache").pVirAddr, 0, llama_layers[i].layer.get_input(prefill_grpid, "K_cache").nSize);
+            memset(llama_layers[i].layer.get_input(prefill_grpid, "V_cache").pVirAddr, 0, llama_layers[i].layer.get_input(prefill_grpid, "V_cache").nSize);
+            memset(llama_layers[i].layer.get_input(decode_grpid, "K_cache").pVirAddr, 0, llama_layers[i].layer.get_input(decode_grpid, "K_cache").nSize);
+            memset(llama_layers[i].layer.get_input(decode_grpid, "V_cache").pVirAddr, 0, llama_layers[i].layer.get_input(decode_grpid, "V_cache").nSize);
+        }
+
         return final_out;
     }
 };
diff --git a/src/runner/ax_model_runner/ax_model_runner.hpp b/src/runner/ax_model_runner/ax_model_runner.hpp
@@ -137,7 +137,7 @@ class ax_runner_base
         }
         if (map_group_output_tensors.find(name) == map_group_output_tensors.end())
         {
-            throw std::runtime_error("input tensor not found: " + name);
+            throw std::runtime_error("output tensor not found: " + name);
         }
         return map_group_output_tensors[name][grpid];
     }

Original file line number	Diff line number	Diff line change
`@@ -137,7 +137,7 @@ class ax_runner_base`
`137`	`137`	`}`
`138`	`138`	`if (map_group_output_tensors.find(name) == map_group_output_tensors.end())`
`139`	`139`	`{`
`140`		`- throw std::runtime_error("input tensor not found: " + name);`
	`140`	`+ throw std::runtime_error("output tensor not found: " + name);`
`141`	`141`	`}`
`142`	`142`	`return map_group_output_tensors[name][grpid];`
`143`	`143`	`}`