Skip to content

Commit f56c8fb

Browse files
committed
[IMP] queue_job: query orphaned dead job not exist in lock table
1 parent 84a3fca commit f56c8fb

File tree

2 files changed

+21
-45
lines changed

2 files changed

+21
-45
lines changed

queue_job/jobrunner/runner.py

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -357,13 +357,13 @@ def _query_requeue_dead_jobs(self):
357357
ELSE exc_info
358358
END)
359359
WHERE
360-
id in (
360+
id IN (
361361
SELECT
362362
queue_job_id
363363
FROM
364364
queue_job_lock
365365
WHERE
366-
queue_job_id in (
366+
queue_job_id IN (
367367
SELECT
368368
id
369369
FROM
@@ -375,34 +375,18 @@ def _query_requeue_dead_jobs(self):
375375
)
376376
FOR UPDATE SKIP LOCKED
377377
)
378-
RETURNING uuid
379-
"""
380-
381-
def _query_requeue_orphaned_jobs(self):
382-
"""Query to requeue jobs stuck in 'enqueued' state without a lock.
383-
384-
This handles the edge case where the runner marks a job as 'enqueued'
385-
but the HTTP request to start the job never reaches the Odoo server
386-
(e.g., due to server shutdown/crash between setting enqueued and
387-
the controller receiving the request). These jobs have no lock record
388-
because set_started() was never called, so they are invisible to
389-
_query_requeue_dead_jobs().
390-
"""
391-
return """
392-
UPDATE
393-
queue_job
394-
SET
395-
state='pending'
396-
WHERE
397-
state = 'enqueued'
398-
AND date_enqueued < (now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
399-
AND NOT EXISTS (
400-
SELECT
401-
1
402-
FROM
403-
queue_job_lock
404-
WHERE
405-
queue_job_id = queue_job.id
378+
OR (
379+
state IN ('enqueued','started')
380+
AND date_enqueued <
381+
(now() AT TIME ZONE 'utc' - INTERVAL '10 sec')
382+
AND NOT EXISTS (
383+
SELECT
384+
1
385+
FROM
386+
queue_job_lock
387+
WHERE
388+
queue_job_lock.queue_job_id = queue_job.id
389+
)
406390
)
407391
RETURNING uuid
408392
"""
@@ -425,6 +409,12 @@ def requeue_dead_jobs(self):
425409
However, when the Odoo server crashes or is otherwise force-stopped,
426410
running jobs are interrupted while the runner has no chance to know
427411
they have been aborted.
412+
413+
This also handles orphaned jobs (enqueued but never started, no lock).
414+
This edge case occurs when the runner marks a job as 'enqueued'
415+
but the HTTP request to start the job never reaches the Odoo server
416+
(e.g., due to server shutdown/crash between setting enqueued and
417+
the controller receiving the request).
428418
"""
429419

430420
with closing(self.conn.cursor()) as cr:
@@ -435,14 +425,6 @@ def requeue_dead_jobs(self):
435425
for (uuid,) in cr.fetchall():
436426
_logger.warning("Re-queued dead job with uuid: %s", uuid)
437427

438-
# Requeue orphaned jobs (enqueued but never started, no lock)
439-
query = self._query_requeue_orphaned_jobs()
440-
cr.execute(query)
441-
for (uuid,) in cr.fetchall():
442-
_logger.warning(
443-
"Re-queued orphaned job (enqueued without lock) with uuid: %s", uuid
444-
)
445-
446428

447429
class QueueJobRunner:
448430
def __init__(

test_queue_job/tests/test_requeue_dead_job.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,8 @@ def test_requeue_orphaned_jobs(self):
110110
job_obj.date_enqueued = datetime.now() - timedelta(minutes=1)
111111
job_obj.store()
112112

113-
# job ins't actually picked up by the first requeue attempt
113+
# job is now picked up by the requeue query (which includes orphaned jobs)
114114
query = Database(self.env.cr.dbname)._query_requeue_dead_jobs()
115115
self.env.cr.execute(query)
116116
uuids_requeued = self.env.cr.fetchall()
117-
self.assertFalse(uuids_requeued)
118-
119-
# job is picked up by the 2nd requeue attempt
120-
query = Database(self.env.cr.dbname)._query_requeue_orphaned_jobs()
121-
self.env.cr.execute(query)
122-
uuids_requeued = self.env.cr.fetchall()
123117
self.assertTrue(queue_job.uuid in j[0] for j in uuids_requeued)

0 commit comments

Comments
 (0)