Change the load format to pt for Mixtral (#2028)

WoosukKwon · web-flow · commit b9bcdc715808 · 2023-12-11T10:32:17.000-08:00
diff --git a/vllm/config.py b/vllm/config.py
@@ -119,6 +119,16 @@ def _verify_load_format(self) -> None:
             # Force ROCm to load from pt weights if nothing specific is set
             if load_format == "auto":
                 load_format = "pt"
+
+        # FIXME(woosuk): This is a temporary hack. Support safetensor weights.
+        architectures = getattr(self.hf_config, "architectures", [])
+        if "MixtralForCausalLM" in architectures and load_format != "pt":
+            logger.info(
+                "Currently, only 'pt' format is supported for Mixtral. "
+                "Changing the format to 'pt'. This may re-download the "
+                "weights if you have downloaded the safetensor weights.")
+            load_format = "pt"
+
         self.load_format = load_format
 
     def _verify_tokenizer_mode(self) -> None: