@@ -189,12 +189,12 @@ def main(args_in: list[str] | None = None) -> None:
189189 "pp" : {
190190 "p95" : round (data ['metrics' ]["llamacpp_prompt_processing_second" ]["p(95)" ], 2 ),
191191 "avg" : round (data ['metrics' ]["llamacpp_prompt_processing_second" ]["avg" ], 2 ),
192- "0" : round (mean (prometheus_metrics ['prompt_tokens_seconds' ]), 2 ),
192+ "0" : round (mean (prometheus_metrics ['prompt_tokens_seconds' ]), 2 ) if 'prompt_tokens_seconds' in prometheus_metrics else 0 ,
193193 },
194194 "tg" : {
195195 "p95" : round (data ['metrics' ]["llamacpp_tokens_second" ]["p(95)" ], 2 ),
196196 "avg" : round (data ['metrics' ]["llamacpp_tokens_second" ]["avg" ], 2 ),
197- "0" : round (mean (prometheus_metrics ['predicted_tokens_seconds' ]), 2 ),
197+ "0" : round (mean (prometheus_metrics ['predicted_tokens_seconds' ]), 2 ) if 'predicted_tokens_seconds' in prometheus_metrics else 0 ,
198198 },
199199 }
200200 with open ("results.github.env" , 'a' ) as github_env :
@@ -234,7 +234,7 @@ def start_server(args):
234234 server_process = start_server_background (args )
235235
236236 attempts = 0
237- max_attempts = 20
237+ max_attempts = 600
238238 if 'GITHUB_ACTIONS' in os .environ :
239239 max_attempts *= 2
240240
@@ -245,7 +245,15 @@ def start_server(args):
245245 print (f"bench: waiting for server to start ..." )
246246 time .sleep (0.5 )
247247
248- print ("bench: server started." )
248+ attempts = 0
249+ while not is_server_ready (args .host , args .port ):
250+ attempts += 1
251+ if attempts > max_attempts :
252+ assert False , "server not ready"
253+ print (f"bench: waiting for server to be ready ..." )
254+ time .sleep (0.5 )
255+
256+ print ("bench: server started and ready." )
249257 return server_process
250258
251259
@@ -258,11 +266,6 @@ def start_server_background(args):
258266 '--host' , args .host ,
259267 '--port' , args .port ,
260268 ]
261- model_file = args .model_path_prefix + os .path .sep + args .hf_file
262- model_dir = os .path .dirname (model_file )
263- if not os .path .exists (model_dir ):
264- os .makedirs (model_dir )
265- server_args .extend (['--model' , model_file ])
266269 server_args .extend (['--hf-repo' , args .hf_repo ])
267270 server_args .extend (['--hf-file' , args .hf_file ])
268271 server_args .extend (['--n-gpu-layers' , args .n_gpu_layers ])
@@ -306,6 +309,12 @@ def is_server_listening(server_fqdn, server_port):
306309 return _is_server_listening
307310
308311
312+ def is_server_ready (server_fqdn , server_port ):
313+ url = f"http://{ server_fqdn } :{ server_port } /health"
314+ response = requests .get (url )
315+ return response .status_code == 200
316+
317+
309318def escape_metric_name (metric_name ):
310319 return re .sub ('[^A-Z0-9]' , '_' , metric_name .upper ())
311320
0 commit comments