Merge pull request #547 from foundation-model-stack/v2.8.2-rc1

willmj · web-flow · commit ad594c7270e9 · 2025-04-30T15:58:00.000-04:00
chore(release): merge set of changes for v2.8.2
diff --git a/README.md b/README.md
@@ -909,6 +909,8 @@ For information on supported dataset formats and how to tune a vision-language m
 
 ### Supported vision model
 
+Note that vision models are supported starting with `fms-hf-tuning` v2.8.1 or later.
+
 - Legend:
 
   ✅ Ready and available 
@@ -921,12 +923,13 @@ For information on supported dataset formats and how to tune a vision-language m
 
 Model Name & Size  | Model Architecture | LoRA Tuning | Full Finetuning |
 -------------------- | ---------------- | --------------- | --------------- |
-Llama 3.2-11B Vision  | MllamaForConditionalGeneration | ✅* | ✅* | 
-Llava 1.5-7B  | LlavaForConditionalGeneration | ✅* | ✅* | 
-Granite 3.1-2B Vision  | LlavaNextForConditionalGeneration | ✅* | ✅* |
-Llava Mistral 1.6-7B  | LlavaNextForConditionalGeneration | ✅* | ✅* |
-
-(*) - Supported with `fms-hf-tuning` v2.8.0 or later.
+Llama 3.2-11B Vision  | MllamaForConditionalGeneration | ✅ | ✅ |
+Llama 3.2-90B Vision  | MllamaForConditionalGeneration | ✔️ | ✔️ |
+Granite 3.2-2B Vision  | LlavaNextForConditionalGeneration | ✅ | ✅ |
+Llava Mistral 1.6-7B  | LlavaNextForConditionalGeneration | ✅ | ✅ |
+Llava 1.6-34B  | LlavaNextForConditionalGeneration | ✔️ | ✔️ |
+Llava 1.5-7B  | LlavaForConditionalGeneration | ✅ | ✅ |
+Llava 1.5-13B  | LlavaForConditionalGeneration | ✔️ | ✔️ |
 
 **Note**: vLLM currently does not support inference with LoRA-tuned vision models. To use a tuned LoRA adapter of vision model, please merge it with the base model before running vLLM inference.
 
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -167,26 +167,17 @@ def train(
                 "`--padding_free` argument was called with `packing=True`, "
                 "Trainer should not perform packing when using `--padding_free`"
             )
-
+    if fast_moe_config is not None and fast_moe_config.fast_moe is None:
+        fast_moe_config = None
     if fast_moe_config is not None:
-        # Checking for unsupported modules with Scatter MoE for LoRA
-        # Only raise an error for `all-linear`
-        restricted_modules = ["all-linear"]
+        # If LoRA with ScatterMoE detected, raise warning
+        accepted_layers = ["all-linear"]
         if (
             peft_config is not None
             and hasattr(peft_config, "target_modules")
-            and any(
-                module in (peft_config.target_modules or [])
-                for module in restricted_modules
-            )
+            and fast_moe_config.fast_moe is not None
+            and peft_config.target_modules != accepted_layers
         ):
-            raise ValueError(
-                "`--fast_moe` with LoRA does not currently support `all-linear`, as "
-                "target modules at this time. Please explicitly specify target "
-                "modules when using `--fast_moe` with LoRA."
-            )
-        # If other common non-linear modules, raise warning
-        if peft_config is not None and hasattr(peft_config, "target_modules"):
             logger.warning(
                 "You are running lora with the ScatterMoE plugin, please note that "
                 "passing target modules that are part of the moe module can cause unexpected "