@@ -73,12 +73,20 @@ def check_health(self):
7373 """Polling health check route of the API."""
7474
7575 def health_check () -> bool :
76- response = self .get ("ok" )
77- if response .get ("status" ) == "ok" :
78- logger .info ("API health check passed." )
79- return True
80- logger .info (f"Unexpected API health check response: { response } " )
81- return False
76+ try :
77+ response = self .get ("ok" )
78+ if response .get ("status" ) == "ok" :
79+ logger .info ("API health check passed." )
80+ return True
81+ else :
82+ logger .info (
83+ f"Unexpected API health check response: { response } . Retrying..."
84+ )
85+ return False
86+ except Exception as e :
87+ # need to catch this exception, and retry because the API server might not be ready after the JetStream2 instance has been unshelved
88+ logger .error (f"Exception during health_check. Retrying..." )
89+ return False
8290
8391 logger .info (f"Checking SpamCheckAPI..." )
8492 if not _poll_status (
@@ -224,7 +232,10 @@ def _poll_status(func: Callable[[], bool], interval: int, timeout: int) -> bool:
224232
225233def _run_command (options ):
226234 dry_run = options ["skip_changing_instance_state" ]
227- logger .warning (f"dry_run is { dry_run } " )
235+ skip_shelving_when_done = options ["skip_shelving_when_done" ]
236+
237+ logger .debug (f"dry_run is { dry_run } " )
238+ logger .debug (f"skip_shelving_when_done is { skip_shelving_when_done } " )
228239
229240 _setup_environment ()
230241 conn = openstack .connect (cloud = "envvars" )
@@ -236,12 +247,12 @@ def _run_command(options):
236247 settings .LLM_SPAM_CHECK_API_URL , settings .LLM_SPAM_CHECK_API_KEY
237248 )
238249
239- if not dry_run :
240- jetstream_instance_manager .ensure_active ()
241- else :
250+ if dry_run :
242251 logger .info (
243252 f"Mocking server state to be { Config .JETSTREAM_SERVER_STATUS_ACTIVE } "
244253 )
254+ else :
255+ jetstream_instance_manager .ensure_active ()
245256
246257 try :
247258 spam_check_api_client .check_health ()
@@ -250,10 +261,11 @@ def _run_command(options):
250261 except Exception as e :
251262 logger .error (f"Communication with SpamCheckAPI failed. { e } " )
252263 finally :
253- if not dry_run :
254- jetstream_instance_manager .shelve ()
255- else :
256- logger .info ("Mocking shelving server" )
264+ if not skip_shelving_when_done :
265+ if dry_run :
266+ logger .info ("Mocking shelving server" )
267+ else :
268+ jetstream_instance_manager .shelve ()
257269
258270
259271class Command (BaseCommand ):
@@ -263,7 +275,12 @@ def add_arguments(self, parser):
263275 parser .add_argument (
264276 "--skip-changing-instance-state" ,
265277 action = "store_true" ,
266- help = "Run the command without changing the state of the JetStream2 instance" ,
278+ help = "Run the command without changing the state of the JetStream2 instance." ,
279+ )
280+ parser .add_argument (
281+ "--skip-shelving-when-done" ,
282+ action = "store_true" ,
283+ help = "Run the command without shelving the JetStream2 instance after the workflow has completed." ,
267284 )
268285
269286 def handle (self , * args , ** options ):
0 commit comments