Skip to content

Commit 11d0af3

Browse files
authored
Merge pull request #1198 from transformerlab/fix/remote-cancel-run-jobs
Dont cancel running REMOTE jobs when the API is restarted
2 parents 5e044db + f6f6f59 commit 11d0af3

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

api/transformerlab/services/experiment_init.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ def seed_default_experiments():
362362

363363

364364
def cancel_in_progress_jobs():
365-
"""On startup, mark any RUNNING jobs as CANCELLED in the filesystem job store across all organizations."""
365+
"""On startup, mark any RUNNING jobs as CANCELLED in the filesystem job store across all organizations.
366+
REMOTE jobs are excluded from this cancellation as they run on external compute providers."""
366367
# Get HOME_DIR
367368
try:
368369
home_dir = HOME_DIR
@@ -392,8 +393,14 @@ def cancel_in_progress_jobs():
392393
job_id = entry_path.rstrip("/").split("/")[-1]
393394
job = Job.get(job_id)
394395
if job.get_status() == "RUNNING":
395-
job.update_status("CANCELLED")
396-
print(f"Cancelled running job: {job_id} (org: {org_id})")
396+
# Skip REMOTE jobs - they should not be cancelled on startup
397+
job_data = job.get_json_data(uncached=True)
398+
job_type = job_data.get("type", "")
399+
if job_type == "REMOTE":
400+
print(f"Skipping REMOTE job: {job_id} (org: {org_id})")
401+
else:
402+
job.update_status("CANCELLED")
403+
print(f"Cancelled running job: {job_id} (org: {org_id})")
397404
except Exception:
398405
# If we can't access the job, continue to the next one
399406
pass

0 commit comments

Comments
 (0)