fix: prevent OOM when converting DeepSeek-V3 models by enabling memory-efficient loading (#524)

mmy360 · mmy360 · web-flow · commit 82c55b7d393b · 2025-10-17T11:29:38.000+08:00
Co-authored-by: mmy360 &lt;mmy360@foxmail.com&gt;
diff --git a/tools/convert_hf_to_torch_dist.py b/tools/convert_hf_to_torch_dist.py
@@ -86,7 +86,7 @@ def main():
     # Load model
     hf_model_path = args.hf_checkpoint
     bridge = AutoBridge.from_pretrained(hf_model_path, trust_remote_code=True)
-    bridge.load_weights(model, hf_model_path)
+    bridge.load_weights(model, hf_model_path, memory_efficient=True)
     print(f"Model loaded: {hf_model_path}")
 
     save_checkpoint(1, model, None, None, 0)