2626from .report_generation_strategy import CSV_FILES_PATTERN , JSON_FILES_PATTERN
2727
2828
29+ class BenchmarkArgs (BaseModel ):
30+ """Arguments for custom benchmarks."""
31+
32+ model_config = ConfigDict (extra = "allow" , populate_by_name = True )
33+
34+ extra_args : str | list [str ] | None = Field (
35+ default = None ,
36+ serialization_alias = "extra-args" ,
37+ validation_alias = AliasChoices ("extra-args" , "extra_args" ),
38+ )
39+
40+
41+ class Benchmark (BaseModel ):
42+ """Arguments for custom benchmarks."""
43+
44+ model_config = ConfigDict (extra = "allow" , populate_by_name = True )
45+
46+ name : str
47+ cmd : str
48+ repo : Optional [GitRepo ] = None
49+ enabled : bool = False
50+ args : Optional [BenchmarkArgs ] = None
51+ extra_args : str | list [str ] | None = Field (
52+ default = None ,
53+ serialization_alias = "extra-args" ,
54+ validation_alias = AliasChoices ("extra-args" , "extra_args" ),
55+ )
56+
57+ @property
58+ def report_name (self ) -> str :
59+ return f"{ self .name } _report.csv"
60+
61+
2962class WorkerBaseArgs (BaseModel ):
3063 """Base arguments for VLLM workers."""
3164
@@ -78,10 +111,11 @@ class DecodeWorkerArgs(WorkerBaseArgs):
78111class AIDynamoArgs (BaseModel ):
79112 """Arguments for AI Dynamo setup."""
80113
81- model_config = ConfigDict (extra = "allow" )
114+ model_config = ConfigDict (extra = "allow" , populate_by_name = True )
82115
83116 model : str = "Qwen/Qwen3-0.6B"
84117 backend : str = "vllm"
118+ connector : Optional [str ] = None # none, lmcache, kvbm
85119 workspace_path : str = Field (
86120 default = "/workspace" ,
87121 serialization_alias = "workspace-path" ,
@@ -95,32 +129,92 @@ class AIDynamoArgs(BaseModel):
95129 )
96130 prefill_worker : PrefillWorkerArgs | None = None
97131 prefill_cmd : str = Field (
98- default = "python3 -m dynamo.vllm" ,
132+ default = "python3 -m dynamo.vllm --is-prefill-worker " ,
99133 serialization_alias = "prefill-cmd" ,
100134 validation_alias = AliasChoices ("prefill-cmd" , "prefill_cmd" ),
101135 )
102136
103137
104- class GenAIPerfArgs (BaseModel ):
105- """Arguments for GenAI performance profiling."""
138+ class LMCacheArgs (BaseModel ):
139+ """Arguments for LMCache."""
140+
141+ model_config = ConfigDict (extra = "allow" )
142+
143+ chunk_size : int = 256
144+ local_cpu : bool = False
145+ nixl_buffer_size : int = 10737418240
146+ nixl_buffer_device : str = "cuda"
147+ extra_config_enable_nixl_storage : bool = True
148+ extra_config_nixl_backend : str = "GDS_MT"
149+ extra_config_nixl_file_pool_size : int = 64
150+ extra_config_nixl_path : str = "%CACHEDIR%"
151+
152+ # LMCache controller configuration
153+ enable_controller : bool = True
154+ lmcache_instance_id : str = "lmcache_default_instance"
155+ controller_url : str = "localhost:9001"
156+ lmcache_worker_port : int = 8788
157+ distributed_url : str = "localhost:8789"
158+
159+
160+ class LMCache (BaseModel ):
161+ """LMCache configuration."""
106162
107163 model_config = ConfigDict (extra = "allow" )
108164
109- extra_args : str | None = Field (
165+ controller_cmd : str = "lmcache_controller --host localhost --port 9000 --monitor-port 9001"
166+ repo : Optional [GitRepo ] = GitRepo (
167+ url = "git@github.com:LMCache/LMCache.git" , commit = "ab8530993992db873869ba882320953582d94309"
168+ )
169+
170+ args : Optional [LMCacheArgs ] = None
171+ extra_args : str | list [str ] | None = Field (
110172 default = None ,
111173 serialization_alias = "extra-args" ,
112174 validation_alias = AliasChoices ("extra-args" , "extra_args" ),
113175 )
114176
115177
178+ class GenAIPerf (Benchmark ):
179+ """Benchmark configuration for GenAI performance profiling."""
180+
181+ model_config = ConfigDict (extra = "allow" )
182+
183+ cmd : str = "genai-perf profile"
184+
185+
186+ class LMBench (Benchmark ):
187+ """Benchmark configuration for LMBench."""
188+
189+ model_config = ConfigDict (extra = "allow" )
190+
191+ repo : Optional [GitRepo ] = GitRepo (
192+ url = "git@github.com:LMCache/LMBenchmark.git" , commit = "e1406623c5e88878cf2b7fbd64fe6c47f7dcb66f"
193+ )
194+
195+ cmd : str = "python3 ./synthetic-multi-round-qa/multi-round-qa.py"
196+
197+
198+ class Constraints (BaseModel ):
199+ """Constraints for validation of AI Dynamo configurations when using DSE."""
200+
201+ model_config = ConfigDict (extra = "allow" )
202+
203+ prefill_tp_le_decode_tp : bool = True
204+ tp_times_pp_le_gpus_per_node : bool = True
205+ prefill_decode_nodes_le_total_nodes : bool = True
206+
207+
116208class AIDynamoCmdArgs (CmdArgs ):
117209 """Arguments for AI Dynamo."""
118210
119211 docker_image_url : str
120- huggingface_home_container_path : Path = Path ( "/root/.cache/huggingface" )
212+ storage_cache_dir : Optional [ str ] = None
121213 dynamo : AIDynamoArgs
122- genai_perf : GenAIPerfArgs
123- run_script : str = ""
214+ lmcache : LMCacheArgs
215+ genai_perf : GenAIPerf
216+ lmbench : LMBench
217+ custom_bench : Optional [Benchmark ] = None
124218
125219
126220class AIDynamoTestDefinition (TestDefinition ):
@@ -129,10 +223,14 @@ class AIDynamoTestDefinition(TestDefinition):
129223 cmd_args : AIDynamoCmdArgs
130224 _docker_image : Optional [DockerImage ] = None
131225 script : File = File (Path (__file__ ).parent .parent / "ai_dynamo/ai_dynamo.sh" )
226+ genai_perf_wrapper_script : File = File (Path (__file__ ).parent .parent / "ai_dynamo/genai_perf_wrapper.sh" )
227+ calc_percentile_csv : File = File (Path (__file__ ).parent .parent / "ai_dynamo/calc_percentile_csv.py" )
132228 dynamo_repo : GitRepo = GitRepo (
133229 url = "https://github.com/ai-dynamo/dynamo.git" , commit = "f7e468c7e8ff0d1426db987564e60572167e8464"
134230 )
135231 _hf_model : HFModel | None = None
232+ benchmarks : str = "genai_perf"
233+ constraints : Constraints = Constraints ()
136234
137235 @property
138236 def docker_image (self ) -> DockerImage :
@@ -148,7 +246,14 @@ def hf_model(self) -> HFModel:
148246
149247 @property
150248 def installables (self ) -> list [Installable ]:
151- return [self .docker_image , self .script , self .dynamo_repo , self .hf_model ]
249+ return [
250+ self .docker_image ,
251+ self .script ,
252+ self .genai_perf_wrapper_script ,
253+ self .dynamo_repo ,
254+ self .hf_model ,
255+ self .calc_percentile_csv ,
256+ ]
152257
153258 def was_run_successful (self , tr : TestRun ) -> JobStatusResult :
154259 output_path = tr .output_path
@@ -159,3 +264,30 @@ def was_run_successful(self, tr: TestRun) -> JobStatusResult:
159264 if not has_results :
160265 return JobStatusResult (False , "No result files found in the output directory." )
161266 return JobStatusResult (True )
267+
268+ def constraint_check (self , tr : TestRun ) -> bool :
269+ if self .constraints .prefill_tp_le_decode_tp :
270+ prefill_tp = self .cmd_args .dynamo .prefill_worker .tensor_parallel_size
271+ decode_tp = self .cmd_args .dynamo .decode_worker .tensor_parallel_size
272+ if prefill_tp > decode_tp :
273+ logging .info ("constraint_check failed for: prefill_tp_le_decode_tp" )
274+ return False
275+ logging .info ("constraint_check passed for: prefill_tp_le_decode_tp" )
276+
277+ if self .constraints .tp_times_pp_le_gpus_per_node :
278+ tp = self .cmd_args .dynamo .prefill_worker .tensor_parallel_size
279+ pp = self .cmd_args .dynamo .prefill_worker .pipeline_parallel_size
280+ if tp * pp > self .cmd_args .gpus_per_node :
281+ logging .info ("constraint_check failed for: tp_times_pp_le_gpus_per_node" )
282+ return False
283+ logging .info ("constraint_check passed for: tp_times_pp_le_gpus_per_node" )
284+
285+ if self .constraints .prefill_decode_nodes_le_total_nodes :
286+ prefill_nodes = self .cmd_args .dynamo .prefill_worker .num_nodes
287+ decode_nodes = self .cmd_args .dynamo .decode_worker .num_nodes
288+ if prefill_nodes + decode_nodes > self .cmd_args .num_nodes :
289+ logging .info ("constraint_check failed for: prefill_decode_nodes_le_total_nodes" )
290+ return False
291+ logging .info ("constraint_check passed for: prefill_decode_nodes_le_total_nodes" )
292+
293+ return True
0 commit comments