Skip to content
4 changes: 0 additions & 4 deletions benchmarks/profiler/deploy/profile_sla_moe_dgdr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ spec:
profilingConfig:
profilerImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1"
config:
# Engine configuration
engine:
is_moe_model: true # Enable MoE model support (uses TEP/DEP instead of TP)

# Sweep/profiling configuration
sweep:
# Standard online profiling (not using AI Configurator)
Expand Down
11 changes: 8 additions & 3 deletions benchmarks/profiler/profile_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import os

from benchmarks.profiler.utils.defaults import EngineType
from benchmarks.profiler.utils.profile_decode import profile_decode
from benchmarks.profiler.utils.profile_prefill import profile_prefill

Expand Down Expand Up @@ -91,7 +92,11 @@
os.makedirs(args.work_dir, exist_ok=True)
if args.tokenizer_path == "":
args.tokenizer_path = args.model_name
if args.mode == "prefill":

# Convert string mode to EngineType
mode = EngineType(args.mode)

if mode == EngineType.PREFILL:
profile_prefill(
args.work_dir,
args.model_name,
Expand All @@ -101,7 +106,7 @@
args.max_context_length,
args.interpolation_granularity,
)
elif args.mode == "decode":
elif mode == EngineType.DECODE:
assert args.max_kv_tokens > 0, "max_kv_tokens must be provided for decode"
profile_decode(
args.work_dir,
Expand All @@ -115,4 +120,4 @@
args.attention_dp_size,
)
else:
raise ValueError(f"Invalid mode: {args.mode}")
raise ValueError(f"Invalid mode: {mode}")
Loading
Loading