@@ -188,12 +188,6 @@ async def send_batch(self, batch: List[Tuple[str, _BatchRequestModel]], server_u
188
188
timeout = self ._timeout_inference_request ,
189
189
headers = headers ,
190
190
) as response :
191
- # resetting the server status so other requests can be
192
- # scheduled on this node
193
- if server_url in self ._server_status :
194
- # TODO - if the server returns an error, track that so
195
- # we don't send more requests to it
196
- self ._server_status [server_url ] = True
197
191
if response .status == 408 :
198
192
raise HTTPException (408 , "Request timed out" )
199
193
response .raise_for_status ()
@@ -207,7 +201,12 @@ async def send_batch(self, batch: List[Tuple[str, _BatchRequestModel]], server_u
207
201
result = {request [0 ]: ex for request in batch }
208
202
self ._responses .update (result )
209
203
finally :
210
- self ._server_status [server_url ] = True
204
+ # resetting the server status so other requests can be
205
+ # scheduled on this node
206
+ if server_url in self ._server_status :
207
+ # TODO - if the server returns an error, track that so
208
+ # we don't send more requests to it
209
+ self ._server_status [server_url ] = True
211
210
212
211
def _find_free_server (self ) -> Optional [str ]:
213
212
existing = set (self ._server_status .keys ())
0 commit comments