Reduce experts cache when resize (#4138)

RunningLeon · web-flow · commit 6abe33a76ef6 · 2025-11-20T14:18:43.000+08:00
diff --git a/lmdeploy/pytorch/messages.py b/lmdeploy/pytorch/messages.py
@@ -445,6 +445,8 @@ def resize(self, size: int):
         """Set size."""
         assert size <= self._num_real
         self._num_real = size
+        if self._expert_ids is not None:
+            self._expert_ids = self._expert_ids[:size].copy()
 
     def append(self, expert_ids: np.ndarray):
         """Append token ids."""