remove unused code

chickeyton · chickeyton · commit a3d5b487e989 · 2025-08-29T17:45:32.000+08:00
Signed-off-by: chickeyton &lt;ngton2014@gmail.com&gt;
diff --git a/src/vllm_router/routers/routing_logic.py b/src/vllm_router/routers/routing_logic.py
@@ -546,9 +546,7 @@ async def route_request(
             msg = FullLookupMsg(event_id="", tokens=token_ids)
             ret_msg = await self.kv_manager.handle_orchestration_message(msg)
             matched_infos = ret_msg.matched_info
-            # print(f">>>>>>>>>>>>>>>>>>>>>>> matched_infos={matched_infos}")
             best_matched_info = self._find_best_matched(matched_infos)
-            # print(f">>>>>>>>>>>>>>>>>>>>>>> best_matched_info={best_matched_info}")
             self.uncached_prefix_tokens = len(token_ids) - best_matched_info[1][-1][1]
             best_ttft_url = await self._find_best_ttft(endpoints, matched_infos,
                                                        best_matched_info, request_stats)
@@ -586,7 +584,7 @@ async def _find_best_ttft(self, endpoints, matched_infos, best_matched_info,
         best_ttft = float('inf')
         best_ttft_url = None
         for i, matched_info in enumerate(matched_infos):
-            print(f"-------------- URL:{matched_urls[i]} --------------")
+            logger.debug(f"-------------- URL:{matched_urls[i]} --------------")
             ttft = self._estimate_ttft(matched_info, best_matched_info,
                                        matched_stats[i])
             if best_ttft_url is None or ttft <= best_ttft:
@@ -601,7 +599,7 @@ async def _find_best_ttft(self, endpoints, matched_infos, best_matched_info,
             stats = request_stats.get(url, None)
             if stats is None:
                 raise ValueError(f"{url} provides no request stats ")
-            print(f"-------------- URL:{url} --------------")
+            logger.debug(f"-------------- URL:{url} --------------")
             ttft = self._estimate_ttft(None, best_matched_info, stats)
             if best_ttft_url is None or ttft <= best_ttft:
                 best_ttft = ttft
@@ -621,12 +619,12 @@ def _estimate_ttft(self, matched_info, best_matched_info, stats):
                                      stats.engine_prefill_tps)
         ttft = forecasted_queue_time + transfer_time
 
-        print(f"-------------- time estimations --------------")
-        print(f"uncomputed_prefix_tokens: {stats.uncomputed_prefix_tokens}")
-        print(f"engine_prefill_tps: {stats.engine_prefill_tps}")
-        print(f"transfer_time: {transfer_time}")
-        print(f"forecasted_queue_time: {forecasted_queue_time}")
-        print(f"ttft: {ttft}")
+        logger.debug(f"-------------- time estimations --------------")
+        logger.debug(f"uncomputed_prefix_tokens: {stats.uncomputed_prefix_tokens}")
+        logger.debug(f"engine_prefill_tps: {stats.engine_prefill_tps}")
+        logger.debug(f"transfer_time: {transfer_time}")
+        logger.debug(f"forecasted_queue_time: {forecasted_queue_time}")
+        logger.debug(f"ttft: {ttft}")
         return ttft
 
     async def _get_instance_url(self, endpoints, instance_id):
@@ -649,11 +647,8 @@ async def _get_instance_url(self, endpoints, instance_id):
         return url
 
     def _calc_transfer_time(self, matched_info, best_matched_info):
-        #print(f"matched_info[1][-1][1]: {matched_info[1][-1][1]}")
         transfer_time = 0
         for chunk in best_matched_info[1]:
-            #print(f"chunk[0]: {chunk[0]}")
-            #print(f"chunk[1]: {chunk[1]}")
             if matched_info is not None and chunk[1] <= matched_info[1][-1][1]:
                 continue
             # TODO better estimations
@@ -663,7 +658,6 @@ def _calc_transfer_time(self, matched_info, best_matched_info):
                 transfer_time += 0.015
             else:
                 transfer_time += 0.01
-            #print(f"transfer_time: {transfer_time}")
         return transfer_time
 
     def _fallback_routing(self, endpoints, request_stats, request):
diff --git a/src/vllm_router/stats/request_stats.py b/src/vllm_router/stats/request_stats.py
@@ -193,9 +193,6 @@ def on_new_request(self, engine_url: str, request_id: str, timestamp: float, unc
             timestamp: the timestamp when the request was created
             uncached_prefix_tokens: The number of uncached prefix tokens
         """
-        #print(f"************************* on_new_request *************************")
-        #print(f"engine_url:{engine_url} request_id:{request_id} timestamp:{timestamp}")
-        #print(f"uncached_prefix_tokens:{uncached_prefix_tokens}")
         self.request_start_time[(engine_url, request_id)] = timestamp
 
         if uncached_prefix_tokens is not None:
@@ -341,11 +338,9 @@ def get_request_stats(self, current_time: float, urls: List[str] = None) -> Dict
             else:
                 swapped = 0
 
-            #print(f"&&&&&&&&&&&&&& uncached_prefix_tokens:{self.uncached_prefix_tokens}")
 
             engine_prefill_tps = self._calc_engine_prefill_tps(current_time, engine_url)
             uncomputed_prefix_tokens = self._get_uncomputed_prefix_tokens(engine_url)
-            # forecasted_queue_time = self._forecast_queue_time(engine_url, engine_prefill_tps)
 
             ret[engine_url] = RequestStats(
                 qps=qps,
@@ -366,30 +361,22 @@ def get_request_stats(self, current_time: float, urls: List[str] = None) -> Dict
         return ret
 
     def _calc_engine_prefill_tps(self, current_time: float, engine_url: str) -> float:
-        #print(f"**************_calc_engine_prefill_tps**************")
-        #print(f"engine_url:{engine_url} current_time:f{current_time} sliding_window_size:f{self.sliding_window_size}")
         min_start_time = current_time - self.sliding_window_size
-        #print(f"min_start_time:{min_start_time}")
         prefill_periods = TimePeriods()
         all_uncached_prefix_tokens = 0
         for (url, request_id), start_time in self.request_start_time.items():
-            #print(f"url:{url} request_id:{request_id} start_time:{start_time}")
             if url != engine_url or start_time < min_start_time:
-                # print(f"skip 1 |{url != engine_url}|{start_time < min_start_time}")
                 continue
             if ((url, request_id) not in self.first_token_time or
                     (url, request_id) not in self.uncached_prefix_tokens):
-                # print(f"skip 2 |{(url, request_id) not in self.first_token_time}|{(url, request_id) not in self.uncached_prefix_tokens}")
                 continue
 
             uncached_prefix_tokens = self.uncached_prefix_tokens[(url, request_id)]
             if uncached_prefix_tokens > 0:
                 prefill_periods.union(start_time, self.first_token_time[(url, request_id)])
                 all_uncached_prefix_tokens += uncached_prefix_tokens
-            #print(f"[[[[[[[[[[[[[[[[[[[[[[[[[ all_uncached_prefix_tokens:{all_uncached_prefix_tokens}")
 
         length = prefill_periods.compute_length()
-        #print(f"all_uncached_prefix_tokens:{all_uncached_prefix_tokens} prefill_periods length:{length}")
         if length > 0:
             return all_uncached_prefix_tokens / length
         return -1