Address PR review feedback

yaoyu-33 · yaoyu-33 · commit 33e86f643bb8 · 2026-01-28T13:17:10.000-08:00
- Add shebang and NVIDIA copyright headers to gemma3_vl shell scripts
- Fix grammar in scripts/training/README.md
- Refactor run_recipe.py: use direct imports instead of importlib for step functions
- Add error message constants and return type hints
- Fix load_recipe to handle recipes without peft argument via signature inspection
- Fix qwen3_vl.py: use _dataset_choice consistently for dataset selection logic
diff --git a/examples/models/vlm/gemma3_vl/conversion.sh b/examples/models/vlm/gemma3_vl/conversion.sh
@@ -1,3 +1,18 @@
+#!/usr/bin/env bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Workspace directory for checkpoints and results
 WORKSPACE=${WORKSPACE:-/workspace}
 
diff --git a/examples/models/vlm/gemma3_vl/inference.sh b/examples/models/vlm/gemma3_vl/inference.sh
@@ -1,3 +1,18 @@
+#!/usr/bin/env bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Workspace directory for checkpoints and results
 WORKSPACE=${WORKSPACE:-/workspace}
 
diff --git a/examples/models/vlm/gemma3_vl/peft.sh b/examples/models/vlm/gemma3_vl/peft.sh
@@ -1,3 +1,18 @@
+#!/usr/bin/env bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Workspace directory for checkpoints and results
 WORKSPACE=${WORKSPACE:-/workspace}
 
diff --git a/examples/models/vlm/gemma3_vl/sft.sh b/examples/models/vlm/gemma3_vl/sft.sh
@@ -1,3 +1,18 @@
+#!/usr/bin/env bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Workspace directory for checkpoints and results
 WORKSPACE=${WORKSPACE:-/workspace}
 
diff --git a/scripts/training/README.md b/scripts/training/README.md
@@ -251,4 +251,4 @@ Generic scripts call recipes with no arguments passed to the recipe function.
 
 All customization happens through CLI overrides after the config is built.
 
-If you need to pass arguments to the recipe constructor itself (e.g., custom parallelism at recipe build time), use model-specific examples, create a custom script.
+If you need to pass arguments to the recipe constructor itself (e.g., custom parallelism at recipe build time), use model-specific examples or create a custom script.
diff --git a/scripts/training/run_recipe.py b/scripts/training/run_recipe.py
@@ -48,26 +48,37 @@
 """
 
 import argparse
-import importlib
+import inspect
+from typing import Callable
 
 import megatron.bridge.recipes as recipes
 from megatron.bridge.training.config import ConfigContainer
 from megatron.bridge.training.finetune import finetune
+from megatron.bridge.training.gpt_step import forward_step as gpt_forward_step
+from megatron.bridge.training.llava_step import forward_step as llava_forward_step
 from megatron.bridge.training.pretrain import pretrain
 from megatron.bridge.training.utils.omegaconf_utils import process_config_with_overrides
+from megatron.bridge.training.vlm_step import forward_step as vlm_forward_step
 
 
-STEP_MODULES = {
-    "gpt_step": "megatron.bridge.training.gpt_step",
-    "vlm_step": "megatron.bridge.training.vlm_step",
-    "llava_step": "megatron.bridge.training.llava_step",
+STEP_FUNCTIONS: dict[str, Callable] = {
+    "gpt_step": gpt_forward_step,
+    "vlm_step": vlm_forward_step,
+    "llava_step": llava_forward_step,
 }
 
 TRAIN_MODES = {
     "pretrain": pretrain,
     "finetune": finetune,
 }
 
+# Error message constants
+ERR_UNKNOWN_STEP = "Unknown step type: {step_type}. Choose from: {choices}"
+ERR_INFER_MODE_FAILED = (
+    "Unable to infer training mode from recipe name. "
+    "Please include 'pretrain' or 'finetune' in the recipe name or pass --mode explicitly."
+)
+
 
 def parse_args() -> tuple[argparse.Namespace, list[str]]:
     """Parse command-line arguments."""
@@ -92,7 +103,7 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
         "--step_func",
         type=str,
         default="gpt_step",
-        choices=sorted(STEP_MODULES.keys()),
+        choices=sorted(STEP_FUNCTIONS.keys()),
         help="Step function: gpt_step (text-only), vlm_step (vision-language), or llava_step (LLaVA models)",
     )
     parser.add_argument(
@@ -127,18 +138,32 @@ def load_recipe(recipe_name: str, peft_scheme: str | None) -> ConfigContainer:
         )
 
     config_builder = getattr(recipes, recipe_name)
-    return config_builder(peft=peft_scheme)
-
 
-def load_forward_step(step_type: str):
+    # Check if the recipe accepts a 'peft' argument
+    try:
+        sig = inspect.signature(config_builder)
+        params = sig.parameters
+        accepts_peft = "peft" in params or any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values())
+    except (ValueError, TypeError):
+        # If signature inspection fails, fall back to try/except
+        accepts_peft = True
+
+    if accepts_peft:
+        try:
+            return config_builder(peft=peft_scheme)
+        except TypeError:
+            # Fallback if peft is not accepted despite signature inspection
+            return config_builder()
+    else:
+        return config_builder()
+
+
+def load_forward_step(step_type: str) -> Callable:
     """Load forward_step function based on the requested step type."""
     step_key = step_type.lower()
-    if step_key not in STEP_MODULES:
-        raise ValueError(f"Unknown step type: {step_type}. Choose from: {', '.join(STEP_MODULES)}")
-    module = importlib.import_module(STEP_MODULES[step_key])
-    if not hasattr(module, "forward_step"):
-        raise AttributeError(f"{STEP_MODULES[step_key]} does not define forward_step")
-    return module.forward_step
+    if step_key not in STEP_FUNCTIONS:
+        raise ValueError(ERR_UNKNOWN_STEP.format(step_type=step_type, choices=", ".join(STEP_FUNCTIONS)))
+    return STEP_FUNCTIONS[step_key]
 
 
 def infer_train_mode(recipe_name: str) -> str:
@@ -148,10 +173,7 @@ def infer_train_mode(recipe_name: str) -> str:
     has_finetune = "finetune" in lowered
     if has_pretrain ^ has_finetune:
         return "pretrain" if has_pretrain else "finetune"
-    raise ValueError(
-        "Unable to infer training mode from recipe name. "
-        "Please include 'pretrain' or 'finetune' in the recipe name or pass --mode explicitly."
-    )
+    raise ValueError(ERR_INFER_MODE_FAILED)
 
 
 def main() -> None:
diff --git a/src/megatron/bridge/recipes/qwen_vl/qwen3_vl.py b/src/megatron/bridge/recipes/qwen_vl/qwen3_vl.py
@@ -378,9 +378,9 @@ def _qwen3_vl_common(
 
     # Determine dataset selection strategy.
     _processor_model = tokenizer_model or hf_path
-    mock = mock or dataset_type == "hf"
+    _dataset_choice = dataset_type or ("mock" if mock else "hf")
 
-    if mock:
+    if _dataset_choice == "mock":
         dataset_cfg: DatasetProvider = MockVLMConversationProvider(
             seq_length=seq_length,
             hf_processor_path=_processor_model,
@@ -393,7 +393,7 @@ def _qwen3_vl_common(
             create_attention_mask=True,
             pad_to_max_length=True,
         )
-    elif dataset_type == "preloaded":
+    elif _dataset_choice == "preloaded":
         dataset_cfg = PreloadedVLMConversationProvider(
             seq_length=seq_length,
             hf_processor_path=_processor_model,
@@ -407,7 +407,7 @@ def _qwen3_vl_common(
             pin_memory=True,
             persistent_workers=False,
         )
-    elif dataset_type == "hf":
+    elif _dataset_choice == "hf":
         dataset_cfg = HFDatasetConversationProvider(
             seq_length=seq_length,
             hf_processor_path=_processor_model,
@@ -418,7 +418,7 @@ def _qwen3_vl_common(
             pin_memory=True,
             persistent_workers=False,
         )
-    elif dataset_type == "energon":
+    elif _dataset_choice == "energon":
         tokenizer = AutoTokenizer.from_pretrained(_processor_model)
         # Use from_pretrained to ensure correct normalization (mean/std) and config (min_pixels)
         # matching Preloaded provider behavior.
@@ -441,7 +441,7 @@ def _qwen3_vl_common(
         )
     else:
         raise ValueError(
-            f"Unsupported dataset_type '{dataset_type}'. Expected one of ['mock', 'preloaded', 'hf', 'energon']."
+            f"Unsupported dataset_type '{_dataset_choice}'. Expected one of ['mock', 'preloaded', 'hf', 'energon']."
         )
     # Config Container
     cfg = ConfigContainer(

Original file line number	Diff line number	Diff line change
`@@ -251,4 +251,4 @@ Generic scripts call recipes with no arguments passed to the recipe function.`
`251`	`251`
`252`	`252`	`All customization happens through CLI overrides after the config is built.`
`253`	`253`
`254`		`-If you need to pass arguments to the recipe constructor itself (e.g., custom parallelism at recipe build time), use model-specific examples, create a custom script.`
	`254`	`+If you need to pass arguments to the recipe constructor itself (e.g., custom parallelism at recipe build time), use model-specific examples or create a custom script.`