@@ -188,22 +188,26 @@ def wait_for_model_loading(self, model_identifier: str, max_attempts: int = 10,
188
188
return False
189
189
return False # Timed out without reaching LOADED state
190
190
191
- @retry (stop = stop_after_attempt (5 ), wait = wait_fixed (60 ), retry = retry_if_result (lambda x : not x ))
192
191
def load_model (self , model_identifier : str ) -> bool :
193
192
"""
194
- Load a specific model, first unloading all models, and wait for loading to complete.
195
-
196
- Args:
197
- model_identifier: The model to load
198
-
199
- Returns:
200
- bool: True if the model is successfully loaded
193
+ Load a specific model and avoid unnecessary unloading during retries.
201
194
"""
202
- # First unload all models
195
+ # First try to check if model is already loaded
196
+ status = self .check_model_status (model_identifier )
197
+ if status == ModelStatusEnum .LOADED :
198
+ return True
199
+
200
+ # Only unload all models once, then use retries for loading
203
201
if not self .unload_all_models ():
204
202
return False
205
203
206
- # Check current status of our model
204
+ # Now use retries only for the loading portion
205
+ return self ._load_model_with_retries (model_identifier )
206
+
207
+ @retry (stop = stop_after_attempt (5 ), wait = wait_fixed (30 ), retry = retry_if_result (lambda x : not x ))
208
+ def _load_model_with_retries (self , model_identifier : str ) -> bool :
209
+ """Internal method that handles retries for loading a model without unloading first."""
210
+ # Check current status
207
211
status = self .check_model_status (model_identifier )
208
212
209
213
# If already loaded, we're done
@@ -216,7 +220,7 @@ def load_model(self, model_identifier: str) -> bool:
216
220
if not load_request_success :
217
221
return False
218
222
219
- # Now wait for loading to complete
223
+ # Wait for loading to complete
220
224
return self .wait_for_model_loading (model_identifier )
221
225
222
226
return False
0 commit comments