Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ATTRIBUTIONS-Python.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ License: `Apache`
- `Homepage`: https://github.com/huggingface/accelerate


## aiconfigurator (0.2.0)
## aiconfigurator (0.4.0)

### Licenses
License: `Apache-2.0`
Expand Down
2 changes: 0 additions & 2 deletions benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ spec:
# AI Configurator mode (fast simulation-based profiling)
use_ai_configurator: true
aic_system: h200_sxm
aic_model_name: QWEN3_32B
aic_backend_version: "0.20.0"

# SLA targets for profiling
sla:
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/profiler/profile_sla.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ async def run_profile(args):
raise ValueError(
"Must provide --aic-system when using --use-ai-configurator."
)
if not args.aic_model_name:
if not args.aic_hf_id:
raise ValueError(
"Must provide --aic-model-name when using --use-ai-configurator."
"Must provide --aic-hf-id when using --use-ai-configurator."
)
if not args.aic_backend_version:
raise ValueError(
Expand All @@ -160,15 +160,15 @@ async def run_profile(args):

logger.info("Will use aiconfigurator to estimate perf.")
ai_configurator_perf_estimator = AIConfiguratorPerfEstimator(
args.aic_model_name,
args.aic_hf_id,
args.aic_system.lower(),
args.aic_backend,
args.aic_backend_version,
)
else:
if args.aic_system or args.aic_model_name or args.aic_backend_version:
if args.aic_system or args.aic_hf_id or args.aic_backend_version:
logger.warning(
"Will ignore --aic-system, --aic-model-name, and/or --backend-version "
"Will ignore --aic-system, --aic-hf-id, and/or --backend-version "
"when not using --use-ai-configurator."
)

Expand Down
8 changes: 4 additions & 4 deletions benchmarks/profiler/utils/profiler_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def create_profiler_parser() -> argparse.Namespace:
decode_interpolation_granularity: Int (how many samples to benchmark to interpolate ITL under different active kv cache size and decode context length, default: 6)
use_ai_configurator: Boolean (use ai-configurator to estimate benchmarking results instead of running actual deployment, default: False)
aic_system: String (target system for use with aiconfigurator, default: None)
aic_model_name: String (aiconfigurator name of the target model, default: None)
aic_hf_id: String (aiconfigurator name of the target model, default: None)
aic_backend: String (aiconfigurator backend of the target model, if not provided, will use args.backend, default: "")
aic_backend_version: String (specify backend version when using aiconfigurator to estimate perf, default: None)
dry_run: Boolean (dry run the profile job, default: False)
Expand Down Expand Up @@ -281,10 +281,10 @@ def create_profiler_parser() -> argparse.Namespace:
help="Target system for use with aiconfigurator (e.g. h100_sxm, h200_sxm)",
)
parser.add_argument(
"--aic-model-name",
"--aic-hf-id",
type=str,
default=config.get("sweep", {}).get("aic_model_name"),
help="aiconfigurator name of the target model (e.g. QWEN3_32B, DEEPSEEK_V3)",
default=config.get("sweep", {}).get("aic_hf_id"),
help="aiconfigurator name of the target model (e.g. Qwen/Qwen3-32B, meta-llama/Llama-3.1-405B)",
)
parser.add_argument(
"--aic-backend",
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ classifiers = [
]

dependencies = [
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@11b6d821f1fbb34300bb0ed4945f647e89fb411a",
"aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759",
"networkx",
"pandas",
"pydantic>=2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ spec:
# AI Configurator mode (fast simulation-based profiling, 20-30 seconds)
use_ai_configurator: false # Set to false for online profiling (2-4 hours)
aic_system: h200_sxm # Target GPU system for AI Configurator
aic_model_name: QWEN3_0.6B # Model name for AI Configurator
aic_hf_id: Qwen/Qwen3-0.6B # Model name for AI Configurator
aic_backend_version: "0.20.0" # Backend version for AI Configurator

# SLA targets for profiling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
"sweep": map[string]interface{}{
"use_ai_configurator": true,
"aic_system": "h200_sxm",
"aic_model_name": "QWEN3_32B",
"aic_hf_id": "Qwen/Qwen3-32B",
"aic_backend_version": "0.20.0",
},
}),
Expand Down Expand Up @@ -1060,7 +1060,7 @@ var _ = Describe("DGDR Profiler Arguments", func() {
"sweep": map[string]interface{}{
"use_ai_configurator": true,
"aic_system": "h200_sxm",
"aic_model_name": "QWEN3_32B",
"aic_hf_id": "Qwen/Qwen3-32B",
"aic_backend_version": "0.20.0",
},
}),
Expand Down
7 changes: 1 addition & 6 deletions docs/benchmarks/sla_driven_profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,12 @@ profilingConfig:
sweep:
use_ai_configurator: true
aic_system: h200_sxm # GPU system: h100_sxm, h200_sxm, b200_sxm, gb200_sxm, a100_sxm
aic_model_name: QWEN3_32B # AIC model identifier (see supported list)
aic_hf_id: Qwen/Qwen3-32B # AIC model identifier (see supported list)
aic_backend_version: "0.20.0" # TensorRT-LLM version: 0.20.0, 1.0.0rc3, 1.0.0rc6
```

**Supported configurations:** See [AI Configurator documentation](https://github.com/ai-dynamo/aiconfigurator#supported-features)

**Model name mapping examples:**
- `Qwen/Qwen3-32B` → `QWEN3_32B`
- `meta-llama/Llama-3.1-70B` → `LLAMA3.1_70B`
- `deepseek-ai/DeepSeek-V3` → `DEEPSEEK_V3`

### Planner Configuration (Optional)

Pass arguments to the SLA planner:
Expand Down
2 changes: 1 addition & 1 deletion docs/planner/sla_planner_quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ sweep:
sweep:
use_ai_configurator: true
aic_system: h200_sxm
aic_model_name: QWEN3_32B
aic_hf_id: Qwen/Qwen3-32B
aic_backend_version: "0.20.0"
```

Expand Down
10 changes: 5 additions & 5 deletions tests/profiler/test_profile_sla_aiconfigurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self):
self.dry_run = False
self.use_ai_configurator = True
self.aic_system = "h200_sxm"
self.aic_model_name = "QWEN3_32B"
self.aic_hf_id = "Qwen/Qwen3-32B"
self.aic_backend = ""
self.aic_backend_version = "0.20.0"
self.num_gpus_per_node = 8
Expand All @@ -60,7 +60,7 @@ def __init__(self):
@pytest.mark.pre_merge
@pytest.mark.asyncio
@pytest.mark.parametrize(
"missing_arg", ["aic_system", "aic_model_name", "aic_backend_version"]
"missing_arg", ["aic_system", "aic_hf_id", "aic_backend_version"]
)
async def test_aiconfigurator_missing_args(self, trtllm_args, missing_arg):
# Check that validation error happens when a required arg is missing.
Expand Down Expand Up @@ -99,12 +99,12 @@ async def test_trtllm_aiconfigurator_single_model(self, trtllm_args):
("trtllm", "1.0.0rc3"),
],
)
@pytest.mark.parametrize("model_name", ["QWEN3_32B", "LLAMA3.1_405B"])
@pytest.mark.parametrize("hf_model_id", ["Qwen/Qwen3-32B", "meta-llama/Llama-3.1-405B"])
async def test_trtllm_aiconfigurator_many(
self, trtllm_args, model_name, backend, aic_backend_version
self, trtllm_args, hf_model_id, backend, aic_backend_version
):
# Test that profile_sla works with a variety of backend versions and model names.
trtllm_args.aic_model_name = model_name
trtllm_args.aic_hf_id = hf_model_id
trtllm_args.backend = backend
trtllm_args.aic_backend_version = aic_backend_version
await run_profile(trtllm_args)
14 changes: 7 additions & 7 deletions tests/profiler/test_profile_sla_dryrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -103,7 +103,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -153,7 +153,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -196,7 +196,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8
Expand Down Expand Up @@ -262,7 +262,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8 # Will be overridden by auto-generation
Expand Down Expand Up @@ -328,7 +328,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8 # Will be overridden by auto-generation
Expand Down Expand Up @@ -394,7 +394,7 @@ def __init__(self):
self.dry_run = True
self.use_ai_configurator = False
self.aic_system = None
self.aic_model_name = None
self.aic_hf_id = None
self.aic_backend = ""
self.aic_backend_version = None
self.num_gpus_per_node = 8 # Will be overridden by auto-generation
Expand Down
Loading