ai-dynamo · tedzhouhk · Nov 5, 2025 · Nov 5, 2025 · Nov 6, 2025 · Nov 6, 2025
@@ -15,10 +15,6 @@ spec:
   profilingConfig:
     profilerImage: "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.6.1"
     config:
-      # Engine configuration
-      engine:
-        is_moe_model: true  # Enable MoE model support (uses TEP/DEP instead of TP)
-
       # Sweep/profiling configuration
       sweep:
         # Standard online profiling (not using AI Configurator)

@@ -5,6 +5,7 @@
 import logging
 import os
 
+from benchmarks.profiler.utils.defaults import EngineType
 from benchmarks.profiler.utils.profile_decode import profile_decode
 from benchmarks.profiler.utils.profile_prefill import profile_prefill
 
@@ -91,7 +92,11 @@
     os.makedirs(args.work_dir, exist_ok=True)
     if args.tokenizer_path == "":
         args.tokenizer_path = args.model_name
-    if args.mode == "prefill":
+
+    # Convert string mode to EngineType
+    mode = EngineType(args.mode)
+
+    if mode == EngineType.PREFILL:
         profile_prefill(
             args.work_dir,
             args.model_name,
@@ -101,7 +106,7 @@
             args.max_context_length,
             args.interpolation_granularity,
         )
-    elif args.mode == "decode":
+    elif mode == EngineType.DECODE:
         assert args.max_kv_tokens > 0, "max_kv_tokens must be provided for decode"
         profile_decode(
             args.work_dir,
@@ -115,4 +120,4 @@
             args.attention_dp_size,
         )
     else:
-        raise ValueError(f"Invalid mode: {args.mode}")
+        raise ValueError(f"Invalid mode: {mode}")