vllm-project
diff --git a/‎docs/configuration/stage_configs.md‎
Lines changed: 6 additions & 1 deletion b/‎docs/configuration/stage_configs.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎examples/offline_inference/qwen3_omni/end2end.py‎
Lines changed: 10 additions & 10 deletions b/‎examples/offline_inference/qwen3_omni/end2end.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎tests/test_config_factory.py‎
Lines changed: 60 additions & 60 deletions b/‎tests/test_config_factory.py‎
Lines changed: 60 additions & 60 deletions
diff --git a/‎vllm_omni/config/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎vllm_omni/config/__init__.py‎
Lines changed: 2 additions & 2 deletions
@@ -3,7 +3,12 @@
 In vLLM-Omni, the target model is separated into multiple stages, which are processed by different LLMEngines, DiffusionEngines or other types of engines. Depending on different types of stages, such as Autoregressive (AR) stage or Diffusion transformer (DiT) stage, each can choose corresponding schedulers, model workers to load with the Engines in a plug-in fashion.
 
 !!! note
-    Default stage config YAMLs (for example, `vllm_omni/model_executor/stage_configs/qwen2_5_omni.yaml` and `vllm_omni/model_executor/stage_configs/qwen3_omni_moe.yaml`) are bundled and loaded automatically when `stage_configs_path` is not provided. They have been verified to work on 1xH100 for Qwen2.5-Omni and 2xH100 for Qwen3-Omni.
+    Default stage config YAMLs are now organized under `vllm_omni/model_pipelines/<model_type>/`. Each model type has:
+
+    - `pipeline.yaml`: Defines the pipeline structure (stages, types, data-flow connections). Set by model developers at integration time.
+    - `default_args.yaml`: Default runtime/engine args and sampling parameters. Can be overridden via CLI flags.
+
+    These configs are loaded automatically when `stage_configs_path` is not provided. They have been verified to work on 1xH100 for Qwen2.5-Omni and 2xH100 for Qwen3-Omni.
 
 Therefore, as a core part of vLLM-Omni, the stage configs for a model have several main functions:
 
 
@@ -21,7 +21,7 @@
 from vllm.multimodal.image import convert_image_mode
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
-# Import StageConfigFactory for Tier-2 CLI override testing
+# Import Omni for end-to-end inference
 from vllm_omni.entrypoints.omni import Omni
 
 SEED = 42
@@ -295,7 +295,7 @@ def main(args):
     else:
         query_result = query_func()
 
-    # Build kwargs with Tier-2 CLI overrides.
+    # Build kwargs with CLI overrides.
     # Global params (e.g. --gpu-memory-utilization) apply to all stages;
     # per-stage overrides (--stage-N-*) take precedence when specified.
     omni_kwargs = {
@@ -304,7 +304,7 @@ def main(args):
         "stage_init_timeout": args.stage_init_timeout,
     }
 
-    # Add Tier-2 CLI overrides if specified
+    # Add CLI overrides if specified
     if args.gpu_memory_utilization is not None:
         omni_kwargs["gpu_memory_utilization"] = args.gpu_memory_utilization
     if args.tensor_parallel_size is not None:
@@ -500,38 +500,38 @@ def parse_args():
         "--stage-configs-path",
         type=str,
         default=None,
-        help="Path to a stage configs file. If not specified, uses auto-detected Tier-1 topology.",
+        help="Path to a stage configs file. If not specified, uses auto-detected model pipeline config.",
     )
-    # Tier-2 CLI override arguments
+    # CLI override arguments
     parser.add_argument(
         "--gpu-memory-utilization",
         type=float,
         default=None,
-        help="GPU memory utilization for all stages (Tier-2 override). Example: 0.9",
+        help="GPU memory utilization for all stages (CLI override). Example: 0.9",
     )
     parser.add_argument(
         "--tensor-parallel-size",
         type=int,
         default=None,
-        help="Tensor parallel size for all stages (Tier-2 override). Example: 2",
+        help="Tensor parallel size for all stages (CLI override). Example: 2",
     )
     parser.add_argument(
         "--devices",
         type=str,
         default=None,
-        help="Device assignment for stages (Tier-2 override). Example: '0,1'",
+        help="Device assignment for stages (CLI override). Example: '0,1'",
     )
     parser.add_argument(
         "--enforce-eager",
         action="store_true",
         default=False,
-        help="Enforce eager mode for all stages (Tier-2 override).",
+        help="Enforce eager mode for all stages (CLI override).",
     )
     parser.add_argument(
         "--trust-remote-code",
         action="store_true",
         default=False,
-        help="Trust remote code for model loading (Tier-2 override).",
+        help="Trust remote code for model loading (CLI override).",
     )
     parser.add_argument(
         "--video-path",
 
@@ -7,9 +7,9 @@
 import pytest
 
 from vllm_omni.config.stage_config import (
+    ModelPipeline,
     StageConfig,
     StageConfigFactory,
-    StageTopology,
     StageType,
 )
 
@@ -103,8 +103,8 @@ def test_to_omegaconf_with_runtime_overrides(self):
         assert omega_config.runtime.max_batch_size == 64
 
 
-class TestStageTopology:
-    """Tests for StageTopology class."""
+class TestModelPipeline:
+    """Tests for ModelPipeline class."""
 
     def test_valid_linear_dag(self):
         """Test validation of a valid linear DAG."""
@@ -113,8 +113,8 @@ def test_valid_linear_dag(self):
             StageConfig(stage_id=1, model_stage="talker", input_sources=[0]),
             StageConfig(stage_id=2, model_stage="code2wav", input_sources=[1]),
         ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
+        pipeline = ModelPipeline(model_type="test", stages=stages)
+        errors = pipeline.validate_pipeline()
         assert errors == [], f"Unexpected errors: {errors}"
 
     def test_valid_branching_dag(self):
@@ -124,8 +124,8 @@ def test_valid_branching_dag(self):
             StageConfig(stage_id=1, model_stage="branch_a", input_sources=[0]),
             StageConfig(stage_id=2, model_stage="branch_b", input_sources=[0]),
         ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
+        pipeline = ModelPipeline(model_type="test", stages=stages)
+        errors = pipeline.validate_pipeline()
         assert errors == [], f"Unexpected errors: {errors}"
 
     def test_missing_entry_point(self):
@@ -134,8 +134,8 @@ def test_missing_entry_point(self):
             StageConfig(stage_id=0, model_stage="stage_a", input_sources=[1]),
             StageConfig(stage_id=1, model_stage="stage_b", input_sources=[0]),
         ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
+        pipeline = ModelPipeline(model_type="test", stages=stages)
+        errors = pipeline.validate_pipeline()
         assert any("entry point" in e.lower() for e in errors)
 
     def test_missing_dependency(self):
@@ -144,8 +144,8 @@ def test_missing_dependency(self):
             StageConfig(stage_id=0, model_stage="input", input_sources=[]),
             StageConfig(stage_id=1, model_stage="output", input_sources=[99]),  # Invalid
         ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
+        pipeline = ModelPipeline(model_type="test", stages=stages)
+        errors = pipeline.validate_pipeline()
         assert any("non-existent" in e.lower() for e in errors)
 
     def test_duplicate_stage_ids(self):
@@ -154,8 +154,8 @@ def test_duplicate_stage_ids(self):
             StageConfig(stage_id=0, model_stage="stage_a", input_sources=[]),
             StageConfig(stage_id=0, model_stage="stage_b", input_sources=[]),  # Duplicate
         ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
+        pipeline = ModelPipeline(model_type="test", stages=stages)
+        errors = pipeline.validate_pipeline()
         assert any("duplicate" in e.lower() for e in errors)
 
     def test_self_reference(self):
@@ -164,8 +164,8 @@ def test_self_reference(self):
             StageConfig(stage_id=0, model_stage="entry", input_sources=[]),
             StageConfig(stage_id=1, model_stage="self_ref", input_sources=[1]),  # Self
         ]
-        topology = StageTopology(model_type="test", stages=stages)
-        errors = topology.validate_topology()
+        pipeline = ModelPipeline(model_type="test", stages=stages)
+        errors = pipeline.validate_pipeline()
         assert any("itself" in e.lower() for e in errors)
 
     def test_get_stage_by_id(self):
@@ -174,19 +174,19 @@ def test_get_stage_by_id(self):
             StageConfig(stage_id=0, model_stage="thinker", input_sources=[]),
             StageConfig(stage_id=1, model_stage="talker", input_sources=[0]),
         ]
-        topology = StageTopology(model_type="test", stages=stages)
+        pipeline = ModelPipeline(model_type="test", stages=stages)
 
-        stage = topology.get_stage(1)
+        stage = pipeline.get_stage(1)
         assert stage is not None
         assert stage.model_stage == "talker"
 
-        missing = topology.get_stage(99)
+        missing = pipeline.get_stage(99)
         assert missing is None
 
-    def test_empty_topology(self):
-        """Test validation of empty topology."""
-        topology = StageTopology(model_type="test", stages=[])
-        errors = topology.validate_topology()
+    def test_empty_pipeline(self):
+        """Test validation of empty pipeline."""
+        pipeline = ModelPipeline(model_type="test", stages=[])
+        errors = pipeline.validate_pipeline()
         assert any("no stages" in e.lower() for e in errors)
 
 
@@ -281,43 +281,43 @@ def test_per_stage_override_excludes_internal_keys(self):
         assert "model" not in overrides
         assert "batch_timeout" not in overrides
 
-    def test_all_topology_files_exist(self):
-        """Test that every entry in TOPOLOGY_FILES has an actual YAML file."""
-        from vllm_omni.model_executor.stage_topologies import get_topology_path
+    def test_all_pipeline_files_exist(self):
+        """Test that every entry in PIPELINE_DIRS has an actual pipeline.yaml file."""
+        from vllm_omni.model_pipelines import get_pipeline_path
 
-        for model_type, filename in StageConfigFactory.TOPOLOGY_FILES.items():
-            path = get_topology_path(filename)
-            assert path.exists(), f"Missing topology file for {model_type}: {path}"
+        for model_type, dir_name in StageConfigFactory.PIPELINE_DIRS.items():
+            path = get_pipeline_path(dir_name, "pipeline.yaml")
+            assert path.exists(), f"Missing pipeline file for {model_type}: {path}"
 
-    @pytest.mark.parametrize("model_type", list(StageConfigFactory.TOPOLOGY_FILES.keys()))
-    def test_parse_real_topology_files(self, model_type):
-        """Test that each shipped topology YAML parses and validates correctly."""
-        from vllm_omni.model_executor.stage_topologies import get_topology_path
+    @pytest.mark.parametrize("model_type", list(StageConfigFactory.PIPELINE_DIRS.keys()))
+    def test_parse_real_pipeline_files(self, model_type):
+        """Test that each shipped pipeline YAML parses and validates correctly."""
+        from vllm_omni.model_pipelines import get_pipeline_path
 
-        filename = StageConfigFactory.TOPOLOGY_FILES[model_type]
-        path = get_topology_path(filename)
-        topology = StageConfigFactory._parse_topology_yaml(path, model_type)
+        dir_name = StageConfigFactory.PIPELINE_DIRS[model_type]
+        path = get_pipeline_path(dir_name, "pipeline.yaml")
+        pipeline = StageConfigFactory._parse_pipeline_yaml(path, model_type)
 
         # Basic structure
-        assert topology.model_type == model_type
-        assert len(topology.stages) >= 1
+        assert pipeline.model_type == model_type
+        assert len(pipeline.stages) >= 1
 
         # Must pass validation
-        errors = topology.validate_topology()
+        errors = pipeline.validate_pipeline()
         assert errors == [], f"{model_type}: {errors}"
 
         # Every stage must have required fields
-        for stage in topology.stages:
+        for stage in pipeline.stages:
             assert isinstance(stage.stage_id, int)
             assert isinstance(stage.model_stage, str)
             assert isinstance(stage.stage_type, StageType)
 
 
-class TestTopologyYamlParsing:
-    """Tests for stage topology YAML file parsing (@ZJY0516)."""
+class TestPipelineYamlParsing:
+    """Tests for model pipeline YAML file parsing (@ZJY0516)."""
 
     def test_parse_qwen3_omni_moe_yaml(self, tmp_path):
-        """Test parsing the qwen3_omni_moe topology YAML."""
+        """Test parsing the qwen3_omni_moe pipeline YAML."""
         yaml_content = """\
 model_type: qwen3_omni_moe
 
@@ -356,13 +356,13 @@ def test_parse_qwen3_omni_moe_yaml(self, tmp_path):
         yaml_file = tmp_path / "qwen3_omni_moe.yaml"
         yaml_file.write_text(yaml_content)
 
-        topology = StageConfigFactory._parse_topology_yaml(yaml_file, "qwen3_omni_moe")
+        pipeline = StageConfigFactory._parse_pipeline_yaml(yaml_file, "qwen3_omni_moe")
 
-        assert topology.model_type == "qwen3_omni_moe"
-        assert len(topology.stages) == 3
+        assert pipeline.model_type == "qwen3_omni_moe"
+        assert len(pipeline.stages) == 3
 
         # Stage 0: thinker
-        s0 = topology.stages[0]
+        s0 = pipeline.stages[0]
         assert s0.stage_id == 0
         assert s0.model_stage == "thinker"
         assert s0.stage_type == StageType.LLM
@@ -373,7 +373,7 @@ def test_parse_qwen3_omni_moe_yaml(self, tmp_path):
         assert s0.is_comprehension is True
 
         # Stage 1: talker
-        s1 = topology.stages[1]
+        s1 = pipeline.stages[1]
         assert s1.stage_id == 1
         assert s1.input_sources == [0]
         assert s1.custom_process_input_func == (
@@ -382,7 +382,7 @@ def test_parse_qwen3_omni_moe_yaml(self, tmp_path):
         assert s1.final_output is False
 
         # Stage 2: code2wav
-        s2 = topology.stages[2]
+        s2 = pipeline.stages[2]
         assert s2.stage_id == 2
         assert s2.input_sources == [1]
         assert s2.worker_type == "generation"
@@ -405,11 +405,11 @@ def test_parse_yaml_with_legacy_engine_input_source(self, tmp_path):
         yaml_file = tmp_path / "legacy.yaml"
         yaml_file.write_text(yaml_content)
 
-        topology = StageConfigFactory._parse_topology_yaml(yaml_file, "legacy_model")
-        assert topology.stages[1].input_sources == [0]
+        pipeline = StageConfigFactory._parse_pipeline_yaml(yaml_file, "legacy_model")
+        assert pipeline.stages[1].input_sources == [0]
 
     def test_parse_yaml_with_connectors_and_edges(self, tmp_path):
-        """Test parsing topology with optional connectors and edges."""
+        """Test parsing pipeline with optional connectors and edges."""
         yaml_content = """\
 model_type: test_model
 
@@ -429,12 +429,12 @@ def test_parse_yaml_with_connectors_and_edges(self, tmp_path):
         yaml_file = tmp_path / "with_connectors.yaml"
         yaml_file.write_text(yaml_content)
 
-        topology = StageConfigFactory._parse_topology_yaml(yaml_file, "test_model")
-        assert topology.connectors == {"type": "ray"}
-        assert topology.edges == [{"from": 0, "to": 1}]
+        pipeline = StageConfigFactory._parse_pipeline_yaml(yaml_file, "test_model")
+        assert pipeline.connectors == {"type": "ray"}
+        assert pipeline.edges == [{"from": 0, "to": 1}]
 
-    def test_parsed_topology_passes_validation(self, tmp_path):
-        """Test that a well-formed YAML produces a valid topology."""
+    def test_parsed_pipeline_passes_validation(self, tmp_path):
+        """Test that a well-formed YAML produces a valid pipeline."""
         yaml_content = """\
 model_type: valid_model
 
@@ -453,8 +453,8 @@ def test_parsed_topology_passes_validation(self, tmp_path):
         yaml_file = tmp_path / "valid.yaml"
         yaml_file.write_text(yaml_content)
 
-        topology = StageConfigFactory._parse_topology_yaml(yaml_file, "valid_model")
-        errors = topology.validate_topology()
+        pipeline = StageConfigFactory._parse_pipeline_yaml(yaml_file, "valid_model")
+        errors = pipeline.validate_pipeline()
         assert errors == [], f"Unexpected validation errors: {errors}"
 
     def test_parse_diffusion_stage_type(self, tmp_path):
@@ -473,5 +473,5 @@ def test_parse_diffusion_stage_type(self, tmp_path):
         yaml_file = tmp_path / "diffusion.yaml"
         yaml_file.write_text(yaml_content)
 
-        topology = StageConfigFactory._parse_topology_yaml(yaml_file, "diff_model")
-        assert topology.stages[0].stage_type == StageType.DIFFUSION
+        pipeline = StageConfigFactory._parse_pipeline_yaml(yaml_file, "diff_model")
+        assert pipeline.stages[0].stage_type == StageType.DIFFUSION
@@ -5,9 +5,9 @@
 from vllm_omni.config.lora import LoRAConfig
 from vllm_omni.config.model import OmniModelConfig
 from vllm_omni.config.stage_config import (
+    ModelPipeline,
     StageConfig,
     StageConfigFactory,
-    StageTopology,
     StageType,
 )
 from vllm_omni.config.yaml_util import (
@@ -20,9 +20,9 @@
 __all__ = [
     "OmniModelConfig",
     "LoRAConfig",
+    "ModelPipeline",
     "StageConfig",
     "StageConfigFactory",
-    "StageTopology",
     "StageType",
     "create_config",
     "load_yaml_raw",