ai-dynamo · jasonqinzhou · Nov 6, 2025
diff --git a/ATTRIBUTIONS-Python.md b/ATTRIBUTIONS-Python.md
@@ -441,7 +441,7 @@ License: `Apache`
   - `Homepage`: https://github.com/huggingface/accelerate
 
 
-## aiconfigurator (0.2.0)
+## aiconfigurator (0.4.0)
 
 ### Licenses
 License: `Apache-2.0`

@@ -19,8 +19,6 @@ spec:
         # AI Configurator mode (fast simulation-based profiling)
         use_ai_configurator: true
         aic_system: h200_sxm
-        aic_model_name: QWEN3_32B
-        aic_backend_version: "0.20.0"
 
       # SLA targets for profiling
       sla:

@@ -149,9 +149,9 @@ async def run_profile(args):
                 raise ValueError(
                     "Must provide --aic-system when using --use-ai-configurator."
                 )
-            if not args.aic_model_name:
+            if not args.aic_hf_id:
                 raise ValueError(
-                    "Must provide --aic-model-name when using --use-ai-configurator."
+                    "Must provide --aic-hf-id when using --use-ai-configurator."
                 )
             if not args.aic_backend_version:
                 raise ValueError(
@@ -160,15 +160,15 @@ async def run_profile(args):
 
             logger.info("Will use aiconfigurator to estimate perf.")
             ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
-                args.aic_model_name,
+                args.aic_hf_id,
                 args.aic_system.lower(),
                 args.aic_backend,
                 args.aic_backend_version,
             )
         else:
-            if args.aic_system or args.aic_model_name or args.aic_backend_version:
+            if args.aic_system or args.aic_hf_id or args.aic_backend_version:
                 logger.warning(
-                    "Will ignore --aic-system, --aic-model-name, and/or --backend-version "
+                    "Will ignore --aic-system, --aic-hf-id, and/or --backend-version "
                     "when not using --use-ai-configurator."
                 )
 

@@ -82,7 +82,7 @@ def create_profiler_parser() -> argparse.Namespace:
             decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
             use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
             aic_system: String (target system for use with aiconfigurator, default: None)
-            aic_model_name: String (aiconfigurator name of the target model, default: None)
+            aic_hf_id: String (aiconfigurator name of the target model, default: None)
             aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
             aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
             dry_run: Boolean (dry run the profile job, default: False)
@@ -281,10 +281,10 @@ def create_profiler_parser() -> argparse.Namespace:
         help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)",
     )
     parser.add_argument(
-        "--aic-model-name",
+        "--aic-hf-id",
         type=str,
-        default=config.get("sweep", {}).get("aic_model_name"),
-        help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)",
+        default=config.get("sweep", {}).get("aic_hf_id"),
+        help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)",
     )
     parser.add_argument(
         "--aic-backend",

diff --git a/benchmarks/pyproject.toml b/benchmarks/pyproject.toml
@@ -40,7 +40,7 @@ classifiers = [
 ]
 
 dependencies = [
-    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",
+    "aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
     "networkx",
     "pandas",
     "pydantic>=2",

@@ -54,7 +54,7 @@ spec:
         # AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
         use_ai_configurator: false  # Set to false for online profiling (2-4 hours)
         aic_system: h200_sxm  # Target GPU system for AI Configurator
-        aic_model_name: QWEN3_0.6B  # Model name for AI Configurator
+        aic_hf_id: Qwen/Qwen3-0.6B  # Model name for AI Configurator
         aic_backend_version: "0.20.0"  # Backend version for AI Configurator
 
       # SLA targets for profiling

@@ -350,7 +350,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_model_name":      "QWEN3_32B",
+								"aic_hf_id":      	   "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),
@@ -1060,7 +1060,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
 							"sweep": map[string]interface{}{
 								"use_ai_configurator": true,
 								"aic_system":          "h200_sxm",
-								"aic_model_name":      "QWEN3_32B",
+								"aic_hf_id":           "Qwen/Qwen3-32B",
 								"aic_backend_version": "0.20.0",
 							},
 						}),

diff --git a/docs/benchmarks/sla_driven_profiling.md b/docs/benchmarks/sla_driven_profiling.md
@@ -299,17 +299,12 @@ profilingConfig:
     sweep:
       use_ai_configurator: true
       aic_system: h200_sxm              # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
-      aic_model_name: QWEN3_32B         # AIC model identifier (see supported list)
+      aic_hf_id: Qwen/Qwen3-32B         # AIC model identifier (see supported list)
       aic_backend_version: "0.20.0"     # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
 ```
 
 **Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)
 
-**Model name mapping examples:**
-- `Qwen/Qwen3-32B` → `QWEN3_32B`
-- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B`
-- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3`
-
 ### Planner Configuration (Optional)
 
 Pass arguments to the SLA planner:

diff --git a/docs/planner/sla_planner_quickstart.md b/docs/planner/sla_planner_quickstart.md
@@ -230,7 +230,7 @@ sweep:
 sweep:
   use_ai_configurator: true
   aic_system: h200_sxm
-  aic_model_name: QWEN3_32B
+  aic_hf_id: Qwen/Qwen3-32B
   aic_backend_version: "0.20.0"
 ```
 

@@ -49,7 +49,7 @@ def __init__(self):
                 self.dry_run = False
                 self.use_ai_configurator = True
                 self.aic_system = "h200_sxm"
-                self.aic_model_name = "QWEN3_32B"
+                self.aic_hf_id = "Qwen/Qwen3-32B"
                 self.aic_backend = ""
                 self.aic_backend_version = "0.20.0"
                 self.num_gpus_per_node = 8
@@ -60,7 +60,7 @@ def __init__(self):
     @pytest.mark.pre_merge
     @pytest.mark.asyncio
     @pytest.mark.parametrize(
-        "missing_arg", ["aic_system", "aic_model_name", "aic_backend_version"]
+        "missing_arg", ["aic_system", "aic_hf_id", "aic_backend_version"]
     )
     async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
         # Check that validation error happens when a required arg is missing.
@@ -99,12 +99,12 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
             ("trtllm", "1.0.0rc3"),
         ],
     )
-    @pytest.mark.parametrize("model_name", ["QWEN3_32B", "LLAMA3.1_405B"])
+    @pytest.mark.parametrize("hf_model_id", ["Qwen/Qwen3-32B", "meta-llama/Llama-3.1-405B"])
     async def test_trtllm_aiconfigurator_many(
-        self, trtllm_args, model_name, backend, aic_backend_version
+        self, trtllm_args, hf_model_id, backend, aic_backend_version
     ):
         # Test that profile_sla works with a variety of backend versions and model names.
-        trtllm_args.aic_model_name = model_name
+        trtllm_args.aic_hf_id = hf_model_id
         trtllm_args.backend = backend
         trtllm_args.aic_backend_version = aic_backend_version
         await run_profile(trtllm_args)
@@ -67,7 +67,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -103,7 +103,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -153,7 +153,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -196,7 +196,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8
@@ -262,7 +262,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8  # Will be overridden by auto-generation
@@ -328,7 +328,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8  # Will be overridden by auto-generation
@@ -394,7 +394,7 @@ def __init__(self):
                 self.dry_run = True
                 self.use_ai_configurator = False
                 self.aic_system = None
-                self.aic_model_name = None
+                self.aic_hf_id = None
                 self.aic_backend = ""
                 self.aic_backend_version = None
                 self.num_gpus_per_node = 8  # Will be overridden by auto-generation