@@ -188,12 +188,6 @@ async def send_batch(self, batch: List[Tuple[str, _BatchRequestModel]], server_u
188188 timeout = self ._timeout_inference_request ,
189189 headers = headers ,
190190 ) as response :
191- # resetting the server status so other requests can be
192- # scheduled on this node
193- if server_url in self ._server_status :
194- # TODO - if the server returns an error, track that so
195- # we don't send more requests to it
196- self ._server_status [server_url ] = True
197191 if response .status == 408 :
198192 raise HTTPException (408 , "Request timed out" )
199193 response .raise_for_status ()
@@ -207,7 +201,12 @@ async def send_batch(self, batch: List[Tuple[str, _BatchRequestModel]], server_u
207201 result = {request [0 ]: ex for request in batch }
208202 self ._responses .update (result )
209203 finally :
210- self ._server_status [server_url ] = True
204+ # resetting the server status so other requests can be
205+ # scheduled on this node
206+ if server_url in self ._server_status :
207+ # TODO - if the server returns an error, track that so
208+ # we don't send more requests to it
209+ self ._server_status [server_url ] = True
211210
212211 def _find_free_server (self ) -> Optional [str ]:
213212 existing = set (self ._server_status .keys ())
0 commit comments