Skip to content

Commit f3fe499

Browse files
committed
AI-Dynamo updates.
Signed-off-by: Kapil Arya <kapila@nvidia.com>
1 parent f5410ee commit f3fe499

File tree

13 files changed

+897
-237
lines changed

13 files changed

+897
-237
lines changed

src/cloudai/systems/kubernetes/kubernetes_system.py

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -296,25 +296,73 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
296296
raise TypeError("Test definition must be an instance of AIDynamoTestDefinition")
297297

298298
genai_perf_results_path = "/tmp/cloudai/genai-perf"
299+
frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
300+
301+
# Copy wrapper script and calc_percentile_csv script to the pod
302+
wrapper_script_path = tdef.genai_perf_wrapper_script.installed_path
303+
calc_csv_script_path = tdef.calc_percentile_csv.installed_path
304+
305+
pod_wrapper_path = "/tmp/genai_perf_wrapper.sh"
306+
pod_calc_csv_path = "/tmp/calc_percentile_csv.py"
307+
308+
logging.debug(f"Copying wrapper script {wrapper_script_path} to pod {frontend_pod}")
309+
cp_wrapper_cmd = f"kubectl cp {wrapper_script_path} {self.default_namespace}/{frontend_pod}:{pod_wrapper_path}"
310+
subprocess.run(cp_wrapper_cmd, shell=True, capture_output=True, text=True, check=True)
311+
312+
logging.debug(f"Copying calc_percentile_csv script {calc_csv_script_path} to pod {frontend_pod}")
313+
cp_calc_cmd = f"kubectl cp {calc_csv_script_path} {self.default_namespace}/{frontend_pod}:{pod_calc_csv_path}"
314+
subprocess.run(cp_calc_cmd, shell=True, capture_output=True, text=True, check=True)
299315

300-
genai_perf_cmd = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
316+
# Make wrapper script executable
317+
chmod_cmd = ["chmod", "+x", pod_wrapper_path]
318+
logging.debug(f"Making wrapper script executable in pod {frontend_pod}")
319+
try:
320+
lazy.k8s.stream.stream(
321+
self.core_v1.connect_get_namespaced_pod_exec,
322+
name=frontend_pod,
323+
namespace=self.default_namespace,
324+
command=chmod_cmd,
325+
stderr=True,
326+
stdin=False,
327+
stdout=True,
328+
tty=False,
329+
)
330+
except lazy.k8s.client.ApiException as e:
331+
logging.error(f"Error making wrapper script executable in pod '{frontend_pod}': {e}")
332+
333+
# Build genai-perf command arguments
334+
genai_perf_cmd_parts = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
301335
for k, v in tdef.cmd_args.genai_perf.model_dump(
302336
exclude={"extra_args", "extra-args"}, exclude_none=True
303337
).items():
304-
genai_perf_cmd.append(f"--{k}={v}")
338+
genai_perf_cmd_parts.append(f"--{k}={v}")
305339
if extra_args := tdef.cmd_args.genai_perf.extra_args:
306-
genai_perf_cmd.extend(extra_args.split())
307-
logging.debug(f"GenAI perf arguments: {genai_perf_cmd=}")
308-
309-
frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
310-
311-
logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={genai_perf_cmd}")
340+
genai_perf_cmd_parts.extend(extra_args.split())
341+
342+
# Build wrapper command with proper parameters
343+
report_file = "genai_perf_report.csv"
344+
wrapper_cmd = [
345+
"/bin/bash",
346+
pod_wrapper_path,
347+
"--result_dir",
348+
genai_perf_results_path,
349+
"--report_file",
350+
report_file,
351+
"--calc_percentile_csv_script",
352+
pod_calc_csv_path,
353+
"--gpus_per_node",
354+
str(self.gpus_per_node),
355+
"--",
356+
*genai_perf_cmd_parts,
357+
]
358+
359+
logging.debug(f"Executing genai-perf wrapper in pod={frontend_pod} cmd={wrapper_cmd}")
312360
try:
313361
genai_results = lazy.k8s.stream.stream(
314362
self.core_v1.connect_get_namespaced_pod_exec,
315363
name=frontend_pod,
316364
namespace=self.default_namespace,
317-
command=genai_perf_cmd,
365+
command=wrapper_cmd,
318366
stderr=True,
319367
stdin=False,
320368
stdout=True,
@@ -324,7 +372,7 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
324372
with (job.test_run.output_path / "genai_perf.log").open("w") as f:
325373
f.write(genai_results)
326374
except lazy.k8s.client.ApiException as e:
327-
logging.error(f"Error executing genai-perf command in pod '{frontend_pod}': {e}")
375+
logging.error(f"Error executing genai-perf wrapper command in pod '{frontend_pod}': {e}")
328376

329377
cp_logs_cmd = " ".join(
330378
[

src/cloudai/workloads/ai_dynamo/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
AIDynamoCmdArgs,
2020
AIDynamoTestDefinition,
2121
DecodeWorkerArgs,
22-
GenAIPerfArgs,
22+
GenAIPerf,
2323
PrefillWorkerArgs,
2424
)
2525
from .kubernetes_json_gen_strategy import AIDynamoKubernetesJsonGenStrategy
@@ -34,6 +34,6 @@
3434
"AIDynamoSlurmCommandGenStrategy",
3535
"AIDynamoTestDefinition",
3636
"DecodeWorkerArgs",
37-
"GenAIPerfArgs",
37+
"GenAIPerf",
3838
"PrefillWorkerArgs",
3939
]

src/cloudai/workloads/ai_dynamo/ai_dynamo.py

Lines changed: 141 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,39 @@
2626
from .report_generation_strategy import CSV_FILES_PATTERN, JSON_FILES_PATTERN
2727

2828

29+
class BenchmarkArgs(BaseModel):
30+
"""Arguments for custom benchmarks."""
31+
32+
model_config = ConfigDict(extra="allow", populate_by_name=True)
33+
34+
extra_args: str | list[str] | None = Field(
35+
default=None,
36+
serialization_alias="extra-args",
37+
validation_alias=AliasChoices("extra-args", "extra_args"),
38+
)
39+
40+
41+
class Benchmark(BaseModel):
42+
"""Arguments for custom benchmarks."""
43+
44+
model_config = ConfigDict(extra="allow", populate_by_name=True)
45+
46+
name: str
47+
cmd: str
48+
repo: Optional[GitRepo] = None
49+
enabled: bool = False
50+
args: Optional[BenchmarkArgs] = None
51+
extra_args: str | list[str] | None = Field(
52+
default=None,
53+
serialization_alias="extra-args",
54+
validation_alias=AliasChoices("extra-args", "extra_args"),
55+
)
56+
57+
@property
58+
def report_name(self) -> str:
59+
return f"{self.name}_report.csv"
60+
61+
2962
class WorkerBaseArgs(BaseModel):
3063
"""Base arguments for VLLM workers."""
3164

@@ -78,10 +111,11 @@ class DecodeWorkerArgs(WorkerBaseArgs):
78111
class AIDynamoArgs(BaseModel):
79112
"""Arguments for AI Dynamo setup."""
80113

81-
model_config = ConfigDict(extra="allow")
114+
model_config = ConfigDict(extra="allow", populate_by_name=True)
82115

83116
model: str = "Qwen/Qwen3-0.6B"
84117
backend: str = "vllm"
118+
connector: Optional[str] = None # none, lmcache, kvbm
85119
workspace_path: str = Field(
86120
default="/workspace",
87121
serialization_alias="workspace-path",
@@ -95,32 +129,92 @@ class AIDynamoArgs(BaseModel):
95129
)
96130
prefill_worker: PrefillWorkerArgs | None = None
97131
prefill_cmd: str = Field(
98-
default="python3 -m dynamo.vllm",
132+
default="python3 -m dynamo.vllm --is-prefill-worker",
99133
serialization_alias="prefill-cmd",
100134
validation_alias=AliasChoices("prefill-cmd", "prefill_cmd"),
101135
)
102136

103137

104-
class GenAIPerfArgs(BaseModel):
105-
"""Arguments for GenAI performance profiling."""
138+
class LMCacheArgs(BaseModel):
139+
"""Arguments for LMCache."""
140+
141+
model_config = ConfigDict(extra="allow")
142+
143+
chunk_size: int = 256
144+
local_cpu: bool = False
145+
nixl_buffer_size: int = 10737418240
146+
nixl_buffer_device: str = "cuda"
147+
extra_config_enable_nixl_storage: bool = True
148+
extra_config_nixl_backend: str = "GDS_MT"
149+
extra_config_nixl_file_pool_size: int = 64
150+
extra_config_nixl_path: str = "%CACHEDIR%"
151+
152+
# LMCache controller configuration
153+
enable_controller: bool = True
154+
lmcache_instance_id: str = "lmcache_default_instance"
155+
controller_url: str = "localhost:9001"
156+
lmcache_worker_port: int = 8788
157+
distributed_url: str = "localhost:8789"
158+
159+
160+
class LMCache(BaseModel):
161+
"""LMCache configuration."""
106162

107163
model_config = ConfigDict(extra="allow")
108164

109-
extra_args: str | None = Field(
165+
controller_cmd: str = "lmcache_controller --host localhost --port 9000 --monitor-port 9001"
166+
repo: Optional[GitRepo] = GitRepo(
167+
url="git@github.com:LMCache/LMCache.git", commit="ab8530993992db873869ba882320953582d94309"
168+
)
169+
170+
args: Optional[LMCacheArgs] = None
171+
extra_args: str | list[str] | None = Field(
110172
default=None,
111173
serialization_alias="extra-args",
112174
validation_alias=AliasChoices("extra-args", "extra_args"),
113175
)
114176

115177

178+
class GenAIPerf(Benchmark):
179+
"""Benchmark configuration for GenAI performance profiling."""
180+
181+
model_config = ConfigDict(extra="allow")
182+
183+
cmd: str = "genai-perf profile"
184+
185+
186+
class LMBench(Benchmark):
187+
"""Benchmark configuration for LMBench."""
188+
189+
model_config = ConfigDict(extra="allow")
190+
191+
repo: Optional[GitRepo] = GitRepo(
192+
url="git@github.com:LMCache/LMBenchmark.git", commit="e1406623c5e88878cf2b7fbd64fe6c47f7dcb66f"
193+
)
194+
195+
cmd: str = "python3 ./synthetic-multi-round-qa/multi-round-qa.py"
196+
197+
198+
class Constraints(BaseModel):
199+
"""Constraints for validation of AI Dynamo configurations when using DSE."""
200+
201+
model_config = ConfigDict(extra="allow")
202+
203+
prefill_tp_le_decode_tp: bool = True
204+
tp_times_pp_le_gpus_per_node: bool = True
205+
prefill_decode_nodes_le_total_nodes: bool = True
206+
207+
116208
class AIDynamoCmdArgs(CmdArgs):
117209
"""Arguments for AI Dynamo."""
118210

119211
docker_image_url: str
120-
huggingface_home_container_path: Path = Path("/root/.cache/huggingface")
212+
storage_cache_dir: Optional[str] = None
121213
dynamo: AIDynamoArgs
122-
genai_perf: GenAIPerfArgs
123-
run_script: str = ""
214+
lmcache: LMCacheArgs
215+
genai_perf: GenAIPerf
216+
lmbench: LMBench
217+
custom_bench: Optional[Benchmark] = None
124218

125219

126220
class AIDynamoTestDefinition(TestDefinition):
@@ -129,10 +223,14 @@ class AIDynamoTestDefinition(TestDefinition):
129223
cmd_args: AIDynamoCmdArgs
130224
_docker_image: Optional[DockerImage] = None
131225
script: File = File(Path(__file__).parent.parent / "ai_dynamo/ai_dynamo.sh")
226+
genai_perf_wrapper_script: File = File(Path(__file__).parent.parent / "ai_dynamo/genai_perf_wrapper.sh")
227+
calc_percentile_csv: File = File(Path(__file__).parent.parent / "ai_dynamo/calc_percentile_csv.py")
132228
dynamo_repo: GitRepo = GitRepo(
133229
url="https://github.com/ai-dynamo/dynamo.git", commit="f7e468c7e8ff0d1426db987564e60572167e8464"
134230
)
135231
_hf_model: HFModel | None = None
232+
benchmarks: str = "genai_perf"
233+
constraints: Constraints = Constraints()
136234

137235
@property
138236
def docker_image(self) -> DockerImage:
@@ -148,7 +246,14 @@ def hf_model(self) -> HFModel:
148246

149247
@property
150248
def installables(self) -> list[Installable]:
151-
return [self.docker_image, self.script, self.dynamo_repo, self.hf_model]
249+
return [
250+
self.docker_image,
251+
self.script,
252+
self.genai_perf_wrapper_script,
253+
self.dynamo_repo,
254+
self.hf_model,
255+
self.calc_percentile_csv,
256+
]
152257

153258
def was_run_successful(self, tr: TestRun) -> JobStatusResult:
154259
output_path = tr.output_path
@@ -159,3 +264,30 @@ def was_run_successful(self, tr: TestRun) -> JobStatusResult:
159264
if not has_results:
160265
return JobStatusResult(False, "No result files found in the output directory.")
161266
return JobStatusResult(True)
267+
268+
def constraint_check(self, tr: TestRun) -> bool:
269+
if self.constraints.prefill_tp_le_decode_tp:
270+
prefill_tp = self.cmd_args.dynamo.prefill_worker.tensor_parallel_size
271+
decode_tp = self.cmd_args.dynamo.decode_worker.tensor_parallel_size
272+
if prefill_tp > decode_tp:
273+
logging.info("constraint_check failed for: prefill_tp_le_decode_tp")
274+
return False
275+
logging.info("constraint_check passed for: prefill_tp_le_decode_tp")
276+
277+
if self.constraints.tp_times_pp_le_gpus_per_node:
278+
tp = self.cmd_args.dynamo.prefill_worker.tensor_parallel_size
279+
pp = self.cmd_args.dynamo.prefill_worker.pipeline_parallel_size
280+
if tp * pp > self.cmd_args.gpus_per_node:
281+
logging.info("constraint_check failed for: tp_times_pp_le_gpus_per_node")
282+
return False
283+
logging.info("constraint_check passed for: tp_times_pp_le_gpus_per_node")
284+
285+
if self.constraints.prefill_decode_nodes_le_total_nodes:
286+
prefill_nodes = self.cmd_args.dynamo.prefill_worker.num_nodes
287+
decode_nodes = self.cmd_args.dynamo.decode_worker.num_nodes
288+
if prefill_nodes + decode_nodes > self.cmd_args.num_nodes:
289+
logging.info("constraint_check failed for: prefill_decode_nodes_le_total_nodes")
290+
return False
291+
logging.info("constraint_check passed for: prefill_decode_nodes_le_total_nodes")
292+
293+
return True

0 commit comments

Comments
 (0)