@@ -112,7 +112,6 @@ def __init__(self, backend: Backend):
112112 async def resolve (
113113 self ,
114114 request : GenerationRequest ,
115- start_time : float ,
116115 timeout_time : float ,
117116 ) -> ResponseSummary :
118117 """
@@ -121,7 +120,6 @@ async def resolve(
121120 and handles any errors that may occur during the process.
122121
123122 :param request: The request to resolve.
124- :param start_time: The time to start the request.
125123 :param timeout_time: The time to wait for a response before timing out.
126124 If timeout_time is math.inf, the request will not timeout.
127125 :return: A ResponseSummary object containing the response from the backend.
@@ -140,10 +138,6 @@ async def _runner():
140138 nonlocal response
141139 response = resp
142140
143- if (wait_time := start_time - time .time ()) > 0 :
144- await asyncio .sleep (wait_time )
145-
146- start_time = time .time ()
147141 await asyncio .wait_for (
148142 _runner (),
149143 timeout = timeout_time - time .time () if timeout_time < math .inf else None ,
@@ -164,7 +158,7 @@ async def _runner():
164158 except Exception as exc : # noqa: BLE001
165159 error = str (exc )
166160
167- return self ._handle_response (request , response , error , start_time )
161+ return self ._handle_response (request , response , error )
168162
169163 def _create_request_func_kwargs (
170164 self ,
@@ -208,7 +202,6 @@ def _handle_response(
208202 request : GenerationRequest ,
209203 response : Any ,
210204 error : Optional [str ],
211- start_time : float ,
212205 ) -> ResponseSummary :
213206 if response is None or not isinstance (
214207 response , (ResponseSummary , StreamingTextResponse )
@@ -228,8 +221,8 @@ def _handle_response(
228221 headers = {},
229222 payload = {},
230223 ),
231- start_time = start_time ,
232- end_time = time . time () ,
224+ start_time = None ,
225+ end_time = None ,
233226 request_id = request .request_id ,
234227 error = error or "Unknown error" ,
235228 )
@@ -243,7 +236,7 @@ def _handle_response(
243236 payload = {},
244237 ),
245238 start_time = response .start_time ,
246- end_time = time . time () ,
239+ end_time = None ,
247240 request_prompt_tokens = request .stats .get ("prompt_tokens" , None ),
248241 request_output_tokens = None ,
249242 response_prompt_tokens = None ,
0 commit comments