File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -236,6 +236,8 @@ def __init__(self,
236236 self .ctx_in_transmission_requests = []
237237 self .previous_batch : Optional [BatchState ] = None
238238 self .num_scheduled_requests : int = 0
239+ self .benchmark_req_queues_size = int (
240+ os .environ .get ("TLLM_BENCHMARK_REQ_QUEUES_SIZE" , 0 ))
239241
240242 # list of requests in each PP micro batch
241243 self .num_micro_batches = self .dist .pp_size
@@ -996,6 +998,13 @@ def _prepare_draft_requests(self):
996998
997999 def _executor_loop_overlap (self ):
9981000 torch .cuda .set_device (self .device_id )
1001+ if self .dist .rank == 0 and not self .is_warmup and self .benchmark_req_queues_size > 0 and self .kv_cache_transceiver :
1002+ while self .request_queue .qsize () < self .benchmark_req_queues_size :
1003+ logger .info (
1004+ f"sleep 5 seconds, num_request_queue: { self .request_queue .qsize ()} "
1005+ )
1006+ time .sleep (5 )
1007+
9991008 with self ._profiler () as profile_step :
10001009 iter_start_time = time .time ()
10011010 iter_stats = None
You can’t perform that action at this time.
0 commit comments