limit per backend

HansVRP · HansVRP · commit 68a0f130c6b3 · 2025-12-17T16:00:13.000+01:00
diff --git a/openeo/extra/job_management/_manager.py b/openeo/extra/job_management/_manager.py
@@ -42,7 +42,7 @@
 
 # TODO: eliminate this module constant (should be part of some constructor interface)
 MAX_RETRIES = 50
-
+QUEUE_LIMIT_PER_BACKEND = 10
 
 # Sentinel value to indicate that a parameter was not set
 _UNSET = object()
@@ -539,19 +539,34 @@ def _job_update_loop(
             stats["track_statuses"] += 1
 
         not_started = job_db.get_by_status(statuses=["not_started"], max=200).copy()
-        pending = job_db.get_by_status(statuses=["created", "queued", "queued_for_start"], max=200).copy()
+        queued = job_db.get_by_status(statuses=["queued"], max=200)
 
-        if len(not_started) > 0 and len(pending) < 10:
+        if len(not_started) > 0:
             # Check number of jobs running at each backend
             running = job_db.get_by_status(statuses=["running"])
             stats["job_db get_by_status"] += 1
-            per_backend = running.groupby("backend_name").size().to_dict()
-            _log.info(f"Running per backend: {per_backend}")
+            running_per_backend = running.groupby("backend_name").size().to_dict()
+            queued_per_backend = queued.groupby("backend_name").size().to_dict()
+            _log.info(f"Running per backend: {running_per_backend}")
+            _log.info(f"Queued per backend: {queued_per_backend}")
+
             total_added = 0
             for backend_name in self.backends:
-                backend_load = per_backend.get(backend_name, 0)
-                if backend_load < self.backends[backend_name].parallel_jobs:
-                    to_add = self.backends[backend_name].parallel_jobs - backend_load
+                backend_running = running_per_backend.get(backend_name, 0)
+                backend_queued = queued_per_backend.get(backend_name, 0)
+
+                # capacity, check per backend (max 10 queued jobs per user/backend)
+                backend_capacity = self.backends[backend_name].parallel_jobs
+                has_capacity = backend_running < backend_capacity
+                under_queued_limit = backend_queued < QUEUE_LIMIT_PER_BACKEND
+
+                if has_capacity and under_queued_limit:
+
+                    #calcualte the number of jobs we can add, also based on the queue size
+                    available_slots = max(0, backend_capacity - backend_running)
+                    remaining_queue_space = max(0, QUEUE_LIMIT_PER_BACKEND - backend_queued)
+                    to_add = min(available_slots, remaining_queue_space)
+     
                     for i in not_started.index[total_added : total_added + to_add]:
                         self._launch_job(start_job, df=not_started, i=i, backend_name=backend_name, stats=stats)
                         stats["job launch"] += 1