Skip to content

Commit a3d5b48

Browse files
committed
remove unused code
Signed-off-by: chickeyton <[email protected]>
1 parent 17030f5 commit a3d5b48

File tree

2 files changed

+8
-27
lines changed

2 files changed

+8
-27
lines changed

src/vllm_router/routers/routing_logic.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -546,9 +546,7 @@ async def route_request(
546546
msg = FullLookupMsg(event_id="", tokens=token_ids)
547547
ret_msg = await self.kv_manager.handle_orchestration_message(msg)
548548
matched_infos = ret_msg.matched_info
549-
# print(f">>>>>>>>>>>>>>>>>>>>>>> matched_infos={matched_infos}")
550549
best_matched_info = self._find_best_matched(matched_infos)
551-
# print(f">>>>>>>>>>>>>>>>>>>>>>> best_matched_info={best_matched_info}")
552550
self.uncached_prefix_tokens = len(token_ids) - best_matched_info[1][-1][1]
553551
best_ttft_url = await self._find_best_ttft(endpoints, matched_infos,
554552
best_matched_info, request_stats)
@@ -586,7 +584,7 @@ async def _find_best_ttft(self, endpoints, matched_infos, best_matched_info,
586584
best_ttft = float('inf')
587585
best_ttft_url = None
588586
for i, matched_info in enumerate(matched_infos):
589-
print(f"-------------- URL:{matched_urls[i]} --------------")
587+
logger.debug(f"-------------- URL:{matched_urls[i]} --------------")
590588
ttft = self._estimate_ttft(matched_info, best_matched_info,
591589
matched_stats[i])
592590
if best_ttft_url is None or ttft <= best_ttft:
@@ -601,7 +599,7 @@ async def _find_best_ttft(self, endpoints, matched_infos, best_matched_info,
601599
stats = request_stats.get(url, None)
602600
if stats is None:
603601
raise ValueError(f"{url} provides no request stats ")
604-
print(f"-------------- URL:{url} --------------")
602+
logger.debug(f"-------------- URL:{url} --------------")
605603
ttft = self._estimate_ttft(None, best_matched_info, stats)
606604
if best_ttft_url is None or ttft <= best_ttft:
607605
best_ttft = ttft
@@ -621,12 +619,12 @@ def _estimate_ttft(self, matched_info, best_matched_info, stats):
621619
stats.engine_prefill_tps)
622620
ttft = forecasted_queue_time + transfer_time
623621

624-
print(f"-------------- time estimations --------------")
625-
print(f"uncomputed_prefix_tokens: {stats.uncomputed_prefix_tokens}")
626-
print(f"engine_prefill_tps: {stats.engine_prefill_tps}")
627-
print(f"transfer_time: {transfer_time}")
628-
print(f"forecasted_queue_time: {forecasted_queue_time}")
629-
print(f"ttft: {ttft}")
622+
logger.debug(f"-------------- time estimations --------------")
623+
logger.debug(f"uncomputed_prefix_tokens: {stats.uncomputed_prefix_tokens}")
624+
logger.debug(f"engine_prefill_tps: {stats.engine_prefill_tps}")
625+
logger.debug(f"transfer_time: {transfer_time}")
626+
logger.debug(f"forecasted_queue_time: {forecasted_queue_time}")
627+
logger.debug(f"ttft: {ttft}")
630628
return ttft
631629

632630
async def _get_instance_url(self, endpoints, instance_id):
@@ -649,11 +647,8 @@ async def _get_instance_url(self, endpoints, instance_id):
649647
return url
650648

651649
def _calc_transfer_time(self, matched_info, best_matched_info):
652-
#print(f"matched_info[1][-1][1]: {matched_info[1][-1][1]}")
653650
transfer_time = 0
654651
for chunk in best_matched_info[1]:
655-
#print(f"chunk[0]: {chunk[0]}")
656-
#print(f"chunk[1]: {chunk[1]}")
657652
if matched_info is not None and chunk[1] <= matched_info[1][-1][1]:
658653
continue
659654
# TODO better estimations
@@ -663,7 +658,6 @@ def _calc_transfer_time(self, matched_info, best_matched_info):
663658
transfer_time += 0.015
664659
else:
665660
transfer_time += 0.01
666-
#print(f"transfer_time: {transfer_time}")
667661
return transfer_time
668662

669663
def _fallback_routing(self, endpoints, request_stats, request):

src/vllm_router/stats/request_stats.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,6 @@ def on_new_request(self, engine_url: str, request_id: str, timestamp: float, unc
193193
timestamp: the timestamp when the request was created
194194
uncached_prefix_tokens: The number of uncached prefix tokens
195195
"""
196-
#print(f"************************* on_new_request *************************")
197-
#print(f"engine_url:{engine_url} request_id:{request_id} timestamp:{timestamp}")
198-
#print(f"uncached_prefix_tokens:{uncached_prefix_tokens}")
199196
self.request_start_time[(engine_url, request_id)] = timestamp
200197

201198
if uncached_prefix_tokens is not None:
@@ -341,11 +338,9 @@ def get_request_stats(self, current_time: float, urls: List[str] = None) -> Dict
341338
else:
342339
swapped = 0
343340

344-
#print(f"&&&&&&&&&&&&&& uncached_prefix_tokens:{self.uncached_prefix_tokens}")
345341

346342
engine_prefill_tps = self._calc_engine_prefill_tps(current_time, engine_url)
347343
uncomputed_prefix_tokens = self._get_uncomputed_prefix_tokens(engine_url)
348-
# forecasted_queue_time = self._forecast_queue_time(engine_url, engine_prefill_tps)
349344

350345
ret[engine_url] = RequestStats(
351346
qps=qps,
@@ -366,30 +361,22 @@ def get_request_stats(self, current_time: float, urls: List[str] = None) -> Dict
366361
return ret
367362

368363
def _calc_engine_prefill_tps(self, current_time: float, engine_url: str) -> float:
369-
#print(f"**************_calc_engine_prefill_tps**************")
370-
#print(f"engine_url:{engine_url} current_time:f{current_time} sliding_window_size:f{self.sliding_window_size}")
371364
min_start_time = current_time - self.sliding_window_size
372-
#print(f"min_start_time:{min_start_time}")
373365
prefill_periods = TimePeriods()
374366
all_uncached_prefix_tokens = 0
375367
for (url, request_id), start_time in self.request_start_time.items():
376-
#print(f"url:{url} request_id:{request_id} start_time:{start_time}")
377368
if url != engine_url or start_time < min_start_time:
378-
# print(f"skip 1 |{url != engine_url}|{start_time < min_start_time}")
379369
continue
380370
if ((url, request_id) not in self.first_token_time or
381371
(url, request_id) not in self.uncached_prefix_tokens):
382-
# print(f"skip 2 |{(url, request_id) not in self.first_token_time}|{(url, request_id) not in self.uncached_prefix_tokens}")
383372
continue
384373

385374
uncached_prefix_tokens = self.uncached_prefix_tokens[(url, request_id)]
386375
if uncached_prefix_tokens > 0:
387376
prefill_periods.union(start_time, self.first_token_time[(url, request_id)])
388377
all_uncached_prefix_tokens += uncached_prefix_tokens
389-
#print(f"[[[[[[[[[[[[[[[[[[[[[[[[[ all_uncached_prefix_tokens:{all_uncached_prefix_tokens}")
390378

391379
length = prefill_periods.compute_length()
392-
#print(f"all_uncached_prefix_tokens:{all_uncached_prefix_tokens} prefill_periods length:{length}")
393380
if length > 0:
394381
return all_uncached_prefix_tokens / length
395382
return -1

0 commit comments

Comments
 (0)