Skip to content

Commit fd9369a

Browse files
committed
[Config Refactor][1/N] Two-Tier Stage Configuration System
Signed-off-by: lishunyang <lishunyang12@163.com> Signed-off-by: Jensen <czjourney@163.com> Signed-off-by: Jiangyun Zhu <zhu.jiangyun@foxmail.com> Signed-off-by: Canlin Guo <canlinguosdu@gmail.com> Signed-off-by: wuhang <wuhang6@huawei.com> Signed-off-by: Hongsheng Liu <liuhongsheng4@huawei.com> Signed-off-by: Chenguang Zheng <fake0fan@users.noreply.github.com>
1 parent a3f2d4c commit fd9369a

File tree

14 files changed

+1307
-52
lines changed

14 files changed

+1307
-52
lines changed

examples/offline_inference/qwen3_omni/end2end.py

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from vllm.multimodal.image import convert_image_mode
2222
from vllm.utils.argparse_utils import FlexibleArgumentParser
2323

24+
# Import StageConfigFactory for Tier-2 CLI override testing
2425
from vllm_omni.entrypoints.omni import Omni
2526

2627
SEED = 42
@@ -294,11 +295,30 @@ def main(args):
294295
else:
295296
query_result = query_func()
296297

298+
# Build kwargs with Tier-2 CLI overrides.
299+
# Global params (e.g. --gpu-memory-utilization) apply to all stages;
300+
# per-stage overrides (--stage-N-*) take precedence when specified.
301+
omni_kwargs = {
302+
"stage_configs_path": args.stage_configs_path,
303+
"log_stats": args.log_stats,
304+
"stage_init_timeout": args.stage_init_timeout,
305+
}
306+
307+
# Add Tier-2 CLI overrides if specified
308+
if args.gpu_memory_utilization is not None:
309+
omni_kwargs["gpu_memory_utilization"] = args.gpu_memory_utilization
310+
if args.tensor_parallel_size is not None:
311+
omni_kwargs["tensor_parallel_size"] = args.tensor_parallel_size
312+
if args.devices is not None:
313+
omni_kwargs["devices"] = args.devices
314+
if args.enforce_eager:
315+
omni_kwargs["enforce_eager"] = args.enforce_eager
316+
if args.trust_remote_code:
317+
omni_kwargs["trust_remote_code"] = args.trust_remote_code
318+
297319
omni_llm = Omni(
298320
model=model_name,
299-
stage_configs_path=args.stage_configs_path,
300-
log_stats=args.log_stats,
301-
stage_init_timeout=args.stage_init_timeout,
321+
**omni_kwargs,
302322
)
303323

304324
thinker_sampling_params = SamplingParams(
@@ -458,6 +478,12 @@ def parse_args():
458478
default="output_audio",
459479
help="[Deprecated] Output wav directory (use --output-dir).",
460480
)
481+
parser.add_argument(
482+
"--output-dir",
483+
type=str,
484+
default=None,
485+
help="Output directory for generated files (text and audio).",
486+
)
461487
parser.add_argument(
462488
"--num-prompts",
463489
type=int,
@@ -474,7 +500,38 @@ def parse_args():
474500
"--stage-configs-path",
475501
type=str,
476502
default=None,
477-
help="Path to a stage configs file.",
503+
help="Path to a stage configs file. If not specified, uses auto-detected Tier-1 topology.",
504+
)
505+
# Tier-2 CLI override arguments
506+
parser.add_argument(
507+
"--gpu-memory-utilization",
508+
type=float,
509+
default=None,
510+
help="GPU memory utilization for all stages (Tier-2 override). Example: 0.9",
511+
)
512+
parser.add_argument(
513+
"--tensor-parallel-size",
514+
type=int,
515+
default=None,
516+
help="Tensor parallel size for all stages (Tier-2 override). Example: 2",
517+
)
518+
parser.add_argument(
519+
"--devices",
520+
type=str,
521+
default=None,
522+
help="Device assignment for stages (Tier-2 override). Example: '0,1'",
523+
)
524+
parser.add_argument(
525+
"--enforce-eager",
526+
action="store_true",
527+
default=False,
528+
help="Enforce eager mode for all stages (Tier-2 override).",
529+
)
530+
parser.add_argument(
531+
"--trust-remote-code",
532+
action="store_true",
533+
default=False,
534+
help="Trust remote code for model loading (Tier-2 override).",
478535
)
479536
parser.add_argument(
480537
"--video-path",

0 commit comments

Comments
 (0)