From e711f25d403a2a381846468321d511d2ff958922 Mon Sep 17 00:00:00 2001 From: scdesperate Date: Wed, 10 Dec 2025 10:57:29 +0800 Subject: [PATCH] [fix](kt-kernel): drop the weights held in Python for loading weights operation in C++ --- kt-kernel/python/utils/llamafile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kt-kernel/python/utils/llamafile.py b/kt-kernel/python/utils/llamafile.py index f39704bc..68dce644 100644 --- a/kt-kernel/python/utils/llamafile.py +++ b/kt-kernel/python/utils/llamafile.py @@ -217,3 +217,6 @@ def load_weights(self, physical_to_logical_map_cpu: Optional[torch.Tensor] = Non # Load weights self.cpu_infer.submit(self.moe.load_weights_task(physical_to_logical_map_cpu.data_ptr())) self.cpu_infer.sync() + + # Drop original weights after loading + self.weights_to_keep = None \ No newline at end of file