Address review comments: remove dead code and stage-id launch

lishunyang12 · claude · lishunyang12 · commit 71763850db01 · 2026-02-23T18:30:02.000+08:00
- Remove ARCH_MAPPING, _auto_detect_model_arch, RUNTIME_PARAMS (dead code) - Remove --stage-id / stage_id_filter (conflicts with #939) - Remove duplicate test, fix stale comments - Add global params clarifying comment in end2end.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/examples/offline_inference/qwen3_omni/end2end.py b/examples/offline_inference/qwen3_omni/end2end.py
@@ -295,7 +295,9 @@ def main(args):
     else:
         query_result = query_func()
 
-    # Build kwargs with Tier-2 CLI overrides
+    # Build kwargs with Tier-2 CLI overrides.
+    # Global params (e.g. --gpu-memory-utilization) apply to all stages;
+    # per-stage overrides (--stage-N-*) take precedence when specified.
     omni_kwargs = {
         "stage_configs_path": args.stage_configs_path,
         "log_stats": args.log_stats,
@@ -313,8 +315,6 @@ def main(args):
         omni_kwargs["enforce_eager"] = args.enforce_eager
     if args.trust_remote_code:
         omni_kwargs["trust_remote_code"] = args.trust_remote_code
-    if args.stage_id is not None:
-        omni_kwargs["stage_id"] = args.stage_id
 
     omni_llm = Omni(
         model=model_name,
@@ -533,12 +533,6 @@ def parse_args():
         default=False,
         help="Trust remote code for model loading (Tier-2 override).",
     )
-    parser.add_argument(
-        "--stage-id",
-        type=int,
-        default=None,
-        help="Launch only the specified stage ID for independent stage testing.",
-    )
     parser.add_argument(
         "--video-path",
         "-v",
diff --git a/tests/test_config_factory.py b/tests/test_config_factory.py
@@ -158,16 +158,6 @@ def test_duplicate_stage_ids(self):
         errors = topology.validate_topology()
         assert any("duplicate" in e.lower() for e in errors)
 
-    def test_mutual_dependency_detected_as_missing_entry(self):
-        """Test that mutual dependencies are caught (no entry point)."""
-        stages = [
-            StageConfig(stage_id=0, model_stage="stage_a", input_sources=[1]),
-            StageConfig(stage_id=1, model_stage="stage_b", input_sources=[0]),
-        ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
-        assert any("entry point" in e.lower() for e in errors)
-
     def test_self_reference(self):
         """Test that self-references are detected."""
         stages = [
@@ -315,7 +305,7 @@ def test_cli_override_forwards_engine_registered_args(self):
         stage = StageConfig(stage_id=0, model_stage="thinker", input_sources=[])
         cli_overrides = {
             "gpu_memory_utilization": 0.9,  # Well-known param
-            "custom_engine_flag": True,  # Engine-registered but not in RUNTIME_PARAMS
+            "custom_engine_flag": True,  # Not in _INTERNAL_KEYS, so forwarded
         }
 
         overrides = StageConfigFactory._merge_cli_overrides(stage, cli_overrides)
@@ -340,13 +330,6 @@ def test_cli_override_excludes_internal_keys(self):
         assert "stage_configs_path" not in overrides
         assert "batch_timeout" not in overrides
 
-    def test_arch_mapping(self):
-        """Test that model architecture mapping is correct."""
-        assert StageConfigFactory.ARCH_MAPPING["qwen3_omni_moe"] == "Qwen3OmniMoeForConditionalGeneration"
-        assert StageConfigFactory.ARCH_MAPPING["qwen2_5_omni"] == "Qwen2_5OmniForConditionalGeneration"
-        assert StageConfigFactory.ARCH_MAPPING["bagel"] == "BagelForConditionalGeneration"
-        assert StageConfigFactory.ARCH_MAPPING["qwen3_tts"] == "Qwen3TTSTalkerForConditionalGeneration"
-
     def test_all_topology_files_exist(self):
         """Test that every entry in TOPOLOGY_FILES has an actual YAML file."""
         from vllm_omni.model_executor.stage_topologies import get_topology_path
diff --git a/vllm_omni/config/stage_config.py b/vllm_omni/config/stage_config.py
@@ -223,7 +223,6 @@ class StageConfigFactory:
     This factory is the main entry point for creating stage configurations.
     It handles:
     - Loading internal Tier-1 pipeline topology files
-    - Auto-detecting model architecture
     - Merging CLI overrides (Tier-2) into stage configs
     - Supporting both single-stage and multi-stage models
     """
@@ -238,28 +237,17 @@ class StageConfigFactory:
         "qwen3_tts": "qwen3_tts.yaml",
     }
 
-    # Mapping of model types to architecture classes
-    ARCH_MAPPING: dict[str, str] = {
-        "qwen3_omni_moe": "Qwen3OmniMoeForConditionalGeneration",
-        "qwen2_5_omni": "Qwen2_5OmniForConditionalGeneration",
-        "bagel": "BagelForConditionalGeneration",
-        "qwen3_tts": "Qwen3TTSTalkerForConditionalGeneration",
-    }
-
     @classmethod
     def create_from_model(
         cls,
         model: str,
         cli_overrides: dict[str, Any] | None = None,
-        stage_id_filter: int | None = None,
     ) -> list[StageConfig]:
         """Load internal topology, merge with CLI overrides.
 
         Args:
             model: Model name or path.
             cli_overrides: Tier-2 CLI overrides from VllmConfig/OmniDiffusionConfig.
-            stage_id_filter: If specified, only return the stage with this ID
-                           (for independent stage launch).
 
         Returns:
             List of StageConfig objects with CLI overrides applied.
@@ -279,12 +267,9 @@ def create_from_model(
         if errors:
             logger.warning(f"Topology validation warnings for {model}: {errors}")
 
-        # Apply CLI overrides and filter stages
+        # Apply CLI overrides
         result: list[StageConfig] = []
         for stage in topology.stages:
-            if stage_id_filter is not None and stage.stage_id != stage_id_filter:
-                continue
-
             # Merge global CLI overrides
             stage.runtime_overrides = cls._merge_cli_overrides(stage, cli_overrides)
             result.append(stage)
@@ -440,46 +425,6 @@ def _auto_detect_model_type(cls, model: str) -> str | None:
             logger.debug(f"Failed to auto-detect model type for {model}: {e}")
             return None
 
-    @classmethod
-    def _auto_detect_model_arch(cls, model: str) -> str | None:
-        """Auto-detect model_arch from model directory.
-
-        Args:
-            model: Model name or path.
-
-        Returns:
-            Model architecture class name if detected, None otherwise.
-        """
-        model_type = cls._auto_detect_model_type(model)
-        if model_type is None:
-            return None
-
-        # Check mapping first
-        if model_type in cls.ARCH_MAPPING:
-            return cls.ARCH_MAPPING[model_type]
-
-        # Fallback: generate from model_type
-        # Convert snake_case to PascalCase and add suffix
-        parts = model_type.split("_")
-        pascal_case = "".join(part.capitalize() for part in parts)
-        return f"{pascal_case}ForConditionalGeneration"
-
-    # Well-known Tier-2 runtime parameters.  Any CLI arg whose name
-    # matches one of these keys is forwarded to every stage by default.
-    # Additional engine-registered args are also accepted (see
-    # _merge_cli_overrides), so this set does NOT need to be exhaustive.
-    RUNTIME_PARAMS: set[str] = {
-        "gpu_memory_utilization",
-        "tensor_parallel_size",
-        "devices",
-        "enforce_eager",
-        "max_num_batched_tokens",
-        "trust_remote_code",
-        "max_batch_size",
-        "distributed_executor_backend",
-        "enable_prefix_caching",
-    }
-
     # Keys that should never be forwarded as engine overrides (internal /
     # orchestrator-only knobs, complex objects, etc.).
     _INTERNAL_KEYS: set[str] = {
@@ -506,8 +451,8 @@ def _merge_cli_overrides(
         """Merge CLI overrides into stage runtime config.
 
         All CLI arguments registered by engine config classes (e.g.
-        EngineArgs / OmniDiffusionConfig) are accepted as overrides,
-        not just the well-known ``RUNTIME_PARAMS`` set.
+        EngineArgs / OmniDiffusionConfig) are accepted as overrides
+        unless they appear in ``_INTERNAL_KEYS``.
 
         Handles:
         - Global overrides (apply to all stages)
diff --git a/vllm_omni/entrypoints/cli/serve.py b/vllm_omni/entrypoints/cli/serve.py
@@ -137,16 +137,6 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
             help="The address of the Ray cluster to connect to.",
         )
 
-        # Independent stage launch support
-        omni_config_group.add_argument(
-            "--stage-id",
-            type=int,
-            default=None,
-            help="Launch only the specified stage ID for distributed deployments. "
-            "Use this when deploying stages independently across nodes. "
-            "Example: --stage-id 0 launches only the first stage.",
-        )
-
         # Diffusion model specific arguments
         omni_config_group.add_argument(
             "--num-gpus",
diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py
@@ -203,7 +203,6 @@ def _initialize_stages(self, model: str, kwargs: dict[str, Any]) -> None:
         batch_timeout = kwargs.get("batch_timeout", 10)
         stage_configs_path = kwargs.get("stage_configs_path", None)
         log_stats = kwargs.get("log_stats", False)
-        stage_id = kwargs.get("stage_id", None)  # For independent stage launch
 
         ### base engine args
         tokenizer = kwargs.get("tokenizer", None)
@@ -221,23 +220,6 @@ def _initialize_stages(self, model: str, kwargs: dict[str, Any]) -> None:
             self.config_path = stage_configs_path
             self.stage_configs = load_stage_configs_from_yaml(stage_configs_path, base_engine_args=base_engine_args)
 
-        # Filter stages if --stage-id is specified (for independent launch).
-        # NOTE: In independent launch mode the filtered stage occupies list
-        # index 0 regardless of its original stage_id.  This is intentional
-        # because the stage runs in isolation without cross-stage connectors.
-        if stage_id is not None:
-            filtered_configs = [cfg for cfg in self.stage_configs if getattr(cfg, "stage_id", None) == stage_id]
-            if not filtered_configs:
-                logger.warning(
-                    f"Stage ID {stage_id} not found in configs. Available IDs: "
-                    f"{[getattr(cfg, 'stage_id', None) for cfg in self.stage_configs]}"
-                )
-            else:
-                logger.info(f"Independent launch mode: loading only stage {stage_id}")
-                self.stage_configs = (
-                    create_config(filtered_configs) if isinstance(filtered_configs[0], dict) else filtered_configs
-                )
-
         # Inject diffusion LoRA-related knobs from kwargs if not present in the stage config.
         for cfg in self.stage_configs:
             try:
diff --git a/vllm_omni/entrypoints/utils.py b/vllm_omni/entrypoints/utils.py
@@ -248,7 +248,7 @@ def extract_runtime_overrides(kwargs: dict[str, Any]) -> dict[str, Any]:
     """Extract Tier-2 runtime parameters from kwargs.
 
     All CLI arguments registered by engine config classes are accepted,
-    not just the well-known set in ``StageConfigFactory.RUNTIME_PARAMS``.
+    unless they appear in ``StageConfigFactory._INTERNAL_KEYS``.
     Internal / orchestrator-only keys are excluded automatically.
 
     Args: