Skip to content

Commit fe1ecfe

Browse files
committed
address review: rename load_yaml_raw, drop Tier-1/2 terminology, fix OmegaConf import
Signed-off-by: lishunyang <lishunyang12@163.com>
1 parent 5158381 commit fe1ecfe

File tree

10 files changed

+62
-72
lines changed

10 files changed

+62
-72
lines changed

examples/offline_inference/qwen3_omni/end2end.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from vllm.multimodal.image import convert_image_mode
2222
from vllm.utils.argparse_utils import FlexibleArgumentParser
2323

24-
# Import StageConfigFactory for Tier-2 CLI override testing
2524
from vllm_omni.entrypoints.omni import Omni
2625

2726
SEED = 42
@@ -295,7 +294,7 @@ def main(args):
295294
else:
296295
query_result = query_func()
297296

298-
# Build kwargs with Tier-2 CLI overrides.
297+
# Build kwargs with CLI overrides.
299298
# Global params (e.g. --gpu-memory-utilization) apply to all stages;
300299
# per-stage overrides (--stage-N-*) take precedence when specified.
301300
omni_kwargs = {
@@ -304,7 +303,7 @@ def main(args):
304303
"stage_init_timeout": args.stage_init_timeout,
305304
}
306305

307-
# Add Tier-2 CLI overrides if specified
306+
# Add CLI overrides if specified
308307
if args.gpu_memory_utilization is not None:
309308
omni_kwargs["gpu_memory_utilization"] = args.gpu_memory_utilization
310309
if args.tensor_parallel_size is not None:
@@ -500,38 +499,38 @@ def parse_args():
500499
"--stage-configs-path",
501500
type=str,
502501
default=None,
503-
help="Path to a stage configs file. If not specified, uses auto-detected Tier-1 topology.",
502+
help="Path to a stage configs file. If not specified, uses auto-detected model pipeline config.",
504503
)
505-
# Tier-2 CLI override arguments
504+
# CLI override arguments (applied to all stages by default)
506505
parser.add_argument(
507506
"--gpu-memory-utilization",
508507
type=float,
509508
default=None,
510-
help="GPU memory utilization for all stages (Tier-2 override). Example: 0.9",
509+
help="GPU memory utilization for all stages. Example: 0.9",
511510
)
512511
parser.add_argument(
513512
"--tensor-parallel-size",
514513
type=int,
515514
default=None,
516-
help="Tensor parallel size for all stages (Tier-2 override). Example: 2",
515+
help="Tensor parallel size for all stages. Example: 2",
517516
)
518517
parser.add_argument(
519518
"--devices",
520519
type=str,
521520
default=None,
522-
help="Device assignment for stages (Tier-2 override). Example: '0,1'",
521+
help="Device assignment for stages. Example: '0,1'",
523522
)
524523
parser.add_argument(
525524
"--enforce-eager",
526525
action="store_true",
527526
default=False,
528-
help="Enforce eager mode for all stages (Tier-2 override).",
527+
help="Enforce eager mode for all stages.",
529528
)
530529
parser.add_argument(
531530
"--trust-remote-code",
532531
action="store_true",
533532
default=False,
534-
help="Trust remote code for model loading (Tier-2 override).",
533+
help="Trust remote code for model loading.",
535534
)
536535
parser.add_argument(
537536
"--video-path",

vllm_omni/config/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313
from vllm_omni.config.yaml_util import (
1414
create_config,
15-
load_yaml_raw,
15+
load_yaml_to_config,
1616
merge_configs,
1717
to_dict,
1818
)
@@ -25,7 +25,7 @@
2525
"StageTopology",
2626
"StageType",
2727
"create_config",
28-
"load_yaml_raw",
28+
"load_yaml_to_config",
2929
"merge_configs",
3030
"to_dict",
3131
]

vllm_omni/config/stage_config.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
"""
4-
Two-Tier Stage Configuration System for vLLM-Omni.
4+
Model Pipeline Configuration System for vLLM-Omni.
55
66
Design Principles:
7-
- Tier-1 (Pipeline Topology): INTERNAL ONLY - set by model developers at integration time
8-
- Tier-2 (Runtime Config): User-configurable via CLI args (VllmConfig/OmniDiffusionConfig params)
7+
- Model Pipeline Config: INTERNAL ONLY - set by model developers at integration time.
8+
Defines pipeline structure (stages, types, data-flow).
9+
- Runtime Config: User-configurable via CLI args (VllmConfig/OmniDiffusionConfig params).
910
10-
Users interact only with Tier-2 (CLI). Tier-1 topology is bundled with models.
11+
Users interact only with CLI args. Pipeline config is bundled with models.
1112
"""
1213

1314
from __future__ import annotations
@@ -19,7 +20,7 @@
1920

2021
from vllm.logger import init_logger
2122

22-
from vllm_omni.config.yaml_util import create_config, load_yaml_raw, to_dict
23+
from vllm_omni.config.yaml_util import create_config, load_yaml_to_config, to_dict
2324
from vllm_omni.model_executor.stage_topologies import get_topology_path
2425

2526
logger = init_logger(__name__)
@@ -39,10 +40,10 @@ class StageConfig:
3940
Note: Engine params (gpu_memory_utilization, tp_size, etc.) come from
4041
VllmConfig or OmniDiffusionConfig via CLI, NOT from this class.
4142
42-
This class represents Tier-1 (Internal) configuration that is:
43+
This class represents pipeline configuration that is:
4344
- Set by model developers at integration time
4445
- NOT user-editable
45-
- Defines pipeline topology, worker types, and processing hooks
46+
- Defines pipeline structure, worker types, and processing hooks
4647
4748
Attributes:
4849
stage_id: Unique identifier for this stage in the pipeline.
@@ -69,28 +70,28 @@ class StageConfig:
6970
# Stage type
7071
stage_type: StageType = StageType.LLM
7172

72-
# Pipeline topology (Tier-1 - Internal, set by developer).
73+
# Pipeline topology (internal, set by developer).
7374
# Lists upstream stage IDs this stage receives data from.
7475
# Future: may be derived from StageTopology.edges for richer
7576
# edge metadata (e.g., data format, buffering policy).
7677
input_sources: list[int] = field(default_factory=list)
7778

78-
# Processing hooks (Tier-1 - Internal)
79+
# Processing hooks (internal)
7980
custom_process_input_func: str | None = None
8081

81-
# Output configuration (Tier-1 - Internal)
82+
# Output configuration (internal)
8283
final_output: bool = False
8384
final_output_type: str | None = None # "text", "audio", "image"
8485

85-
# Worker configuration (Tier-1 - Internal)
86+
# Worker configuration (internal)
8687
worker_type: str | None = None # "ar" or "generation"
8788
scheduler_cls: str | None = None
8889
hf_config_name: str | None = None
8990

9091
# Comprehension flag
9192
is_comprehension: bool = False
9293

93-
# Runtime overrides (Tier-2 - populated from CLI, not from topology file)
94+
# Runtime overrides (populated from CLI, not from pipeline config)
9495
runtime_overrides: dict[str, Any] = field(default_factory=dict)
9596

9697
def to_omegaconf(self) -> Any:
@@ -111,7 +112,7 @@ def to_omegaconf(self) -> Any:
111112
if self.hf_config_name:
112113
engine_args["hf_config_name"] = self.hf_config_name
113114

114-
# Apply runtime overrides from Tier-2 (CLI args)
115+
# Apply runtime overrides from CLI args
115116
for key, value in self.runtime_overrides.items():
116117
if key not in ("devices", "max_batch_size"):
117118
engine_args[key] = value
@@ -144,7 +145,7 @@ def to_omegaconf(self) -> Any:
144145

145146
@dataclass
146147
class StageTopology:
147-
"""Internal Tier-1 topology - bundled with model, not user-editable.
148+
"""Model pipeline topology - bundled with model, not user-editable.
148149
149150
This class represents the complete pipeline topology for a multi-stage model.
150151
It is defined by model developers and validated at integration time (not runtime).
@@ -218,12 +219,12 @@ def validate_topology(self) -> list[str]:
218219

219220

220221
class StageConfigFactory:
221-
"""Factory merges Tier-1 pipeline topology with Tier-2 CLI overrides.
222+
"""Factory merges model pipeline config with CLI overrides.
222223
223224
This factory is the main entry point for creating stage configurations.
224225
It handles:
225-
- Loading internal Tier-1 pipeline topology files
226-
- Merging CLI overrides (Tier-2) into stage configs
226+
- Loading model pipeline config files
227+
- Merging CLI overrides into stage configs
227228
- Supporting both single-stage and multi-stage models
228229
"""
229230

@@ -247,7 +248,7 @@ def create_from_model(
247248
248249
Args:
249250
model: Model name or path.
250-
cli_overrides: Tier-2 CLI overrides from VllmConfig/OmniDiffusionConfig.
251+
cli_overrides: CLI overrides from VllmConfig/OmniDiffusionConfig.
251252
252253
Returns:
253254
List of StageConfig objects with CLI overrides applied.
@@ -328,7 +329,7 @@ def create_default_diffusion(cls, kwargs: dict[str, Any]) -> list[dict[str, Any]
328329

329330
@classmethod
330331
def _load_topology(cls, model: str) -> StageTopology | None:
331-
"""Load internal Tier-1 pipeline topology YAML for the model.
332+
"""Load model pipeline config YAML for the model.
332333
333334
Args:
334335
model: Model name or path.
@@ -355,7 +356,7 @@ def _load_topology(cls, model: str) -> StageTopology | None:
355356

356357
@classmethod
357358
def _parse_topology_yaml(cls, path: Path, model_type: str) -> StageTopology:
358-
"""Parse a Tier-1 pipeline topology YAML file.
359+
"""Parse a model pipeline config YAML file.
359360
360361
Args:
361362
path: Path to the YAML file.
@@ -364,7 +365,7 @@ def _parse_topology_yaml(cls, path: Path, model_type: str) -> StageTopology:
364365
Returns:
365366
StageTopology object.
366367
"""
367-
config_data = load_yaml_raw(path)
368+
config_data = load_yaml_to_config(path)
368369

369370
stages: list[StageConfig] = []
370371
for stage_data in config_data.stages:

vllm_omni/config/yaml_util.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,22 @@
1515
from omegaconf import DictConfig, OmegaConf
1616

1717

18-
def load_yaml_raw(path: str | Any) -> DictConfig:
19-
"""Load a YAML file and return the raw DictConfig.
20-
21-
Use this when downstream code needs attribute-style access
22-
(e.g., ``cfg.stage_args``). Prefer ``load_yaml()`` for new code.
18+
def load_yaml_to_config(path: str) -> DictConfig:
19+
"""Load a YAML file and return it as a DictConfig.
2320
2421
Args:
2522
path: Path to the YAML file.
2623
2724
Returns:
2825
OmegaConf DictConfig.
26+
27+
Raises:
28+
TypeError: If the loaded YAML is not a mapping (e.g., a bare list).
2929
"""
30-
return OmegaConf.load(path) # type: ignore[return-value]
30+
cfg = OmegaConf.load(path)
31+
if not isinstance(cfg, DictConfig):
32+
raise TypeError(f"Expected a DictConfig from {path}, but got {type(cfg)}")
33+
return cfg
3134

3235

3336
def create_config(data: Any) -> DictConfig:

vllm_omni/entrypoints/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from vllm.transformers_utils.config import get_config, get_hf_file_to_dict
1010
from vllm.transformers_utils.repo_utils import file_or_path_exists
1111

12-
from vllm_omni.config.yaml_util import create_config, load_yaml_raw, merge_configs
12+
from vllm_omni.config.yaml_util import create_config, load_yaml_to_config, merge_configs
1313
from vllm_omni.entrypoints.stage_utils import _to_dict
1414
from vllm_omni.platforms import current_omni_platform
1515

@@ -275,7 +275,7 @@ def load_stage_configs_from_yaml(config_path: str, base_engine_args: dict | None
275275
"""
276276
if base_engine_args is None:
277277
base_engine_args = {}
278-
config_data = load_yaml_raw(config_path)
278+
config_data = load_yaml_to_config(config_path)
279279
stage_args = config_data.stage_args
280280
global_async_chunk = config_data.get("async_chunk", False)
281281
# Convert any nested dataclass objects to dicts before creating DictConfig
@@ -320,7 +320,7 @@ def load_and_resolve_stage_configs(
320320
if not stage_configs:
321321
if default_stage_cfg_factory is not None:
322322
default_stage_cfg = default_stage_cfg_factory()
323-
stage_configs = OmegaConf.create(default_stage_cfg)
323+
stage_configs = create_config(default_stage_cfg)
324324
else:
325325
stage_configs = []
326326
else:

vllm_omni/model_executor/stage_topologies/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
"""
4-
Stage topology definitions for vLLM-Omni pipeline models.
4+
Model pipeline config definitions for vLLM-Omni pipeline models.
55
6-
Topology YAML files in this directory define Tier-1 (internal) pipeline
7-
structure: stages, their types, and data-flow connections. Runtime
8-
parameters (GPU memory, tensor-parallel size, etc.) are NOT stored here;
9-
they come from CLI flags (Tier-2).
6+
YAML files in this directory define pipeline structure: stages, their
7+
types, and data-flow connections. Runtime parameters (GPU memory,
8+
tensor-parallel size, etc.) are NOT stored here; they come from CLI flags.
109
"""
1110

1211
from pathlib import Path

vllm_omni/model_executor/stage_topologies/bagel.yaml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
# Tier-1 Stage Topology for Bagel
2-
# This file is set by model developers at integration time.
3-
# Users NEVER edit this - they use CLI for Tier-2 params.
1+
# Model Pipeline Config for Bagel
2+
# Defines pipeline structure (stages, types, data-flow).
3+
# Runtime params (gpu_memory_utilization, tp_size, etc.) come from CLI.
44
#
55
# Stage 0: Thinker (LLM - multimodal understanding + text generation)
66
# Stage 1: DiT (Diffusion - image generation from KV cache)
7-
#
8-
# NOTE: Engine params like gpu_memory_utilization, tensor_parallel_size,
9-
# devices, max_batch_size come from CLI (Tier-2), NOT from this file.
107

118
model_type: bagel
129

vllm_omni/model_executor/stage_topologies/qwen2_5_omni.yaml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
# Tier-1 Stage Topology for Qwen2.5-Omni
2-
# This file is set by model developers at integration time.
3-
# Users NEVER edit this - they use CLI for Tier-2 params.
1+
# Model Pipeline Config for Qwen2.5-Omni
2+
# Defines pipeline structure (stages, types, data-flow).
3+
# Runtime params (gpu_memory_utilization, tp_size, etc.) come from CLI.
44
#
55
# Stage 0: Thinker (multimodal understanding + text generation)
66
# Stage 1: Talker (text embeddings -> audio codec codes)
77
# Stage 2: Code2Wav (codec codes -> audio waveform)
8-
#
9-
# NOTE: Engine params like gpu_memory_utilization, tensor_parallel_size,
10-
# devices, max_batch_size come from CLI (Tier-2), NOT from this file.
118

129
model_type: qwen2_5_omni
1310

vllm_omni/model_executor/stage_topologies/qwen3_omni_moe.yaml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
# Tier-1 Stage Topology for Qwen3-Omni-MoE
2-
# This file is set by model developers at integration time.
3-
# Users NEVER edit this - they use CLI for Tier-2 params.
1+
# Model Pipeline Config for Qwen3-Omni-MoE
2+
# Defines pipeline structure (stages, types, data-flow).
3+
# Runtime params (gpu_memory_utilization, tp_size, etc.) come from CLI.
44
#
55
# Stage 0: Thinker (multimodal understanding + text generation)
66
# Stage 1: Talker (text embeddings -> 8-layer RVQ codec codes)
77
# Stage 2: Code2Wav (8-layer RVQ codes -> audio waveform)
8-
#
9-
# NOTE: Engine params like gpu_memory_utilization, tensor_parallel_size,
10-
# devices, max_batch_size come from CLI (Tier-2), NOT from this file.
118

129
model_type: qwen3_omni_moe
1310

vllm_omni/model_executor/stage_topologies/qwen3_tts.yaml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
# Tier-1 Stage Topology for Qwen3-TTS
2-
# This file is set by model developers at integration time.
3-
# Users NEVER edit this - they use CLI for Tier-2 params.
1+
# Model Pipeline Config for Qwen3-TTS
2+
# Defines pipeline structure (stages, types, data-flow).
3+
# Runtime params (gpu_memory_utilization, tp_size, etc.) come from CLI.
44
#
5-
# Stage 0: Talker (text -> audio codec codes)
5+
# Stage 0: Qwen3-TTS (text -> audio codec codes)
66
# Stage 1: Code2Wav (codec codes -> audio waveform)
7-
#
8-
# NOTE: Engine params like gpu_memory_utilization, tensor_parallel_size,
9-
# devices, max_batch_size come from CLI (Tier-2), NOT from this file.
107

118
model_type: qwen3_tts
129

0 commit comments

Comments
 (0)