Skip to content

Commit bde25db

Browse files
committed
Fallback logic
1 parent 310648f commit bde25db

File tree

1 file changed

+29
-17
lines changed

1 file changed

+29
-17
lines changed

ansible_base/task/tasks.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from django.db import transaction
88
from django.utils.timezone import now, timedelta
99

10+
from ansible_base.lib.utils.db import get_pg_notify_params
11+
1012
from ansible_base.task.models import TASK_STATES, Task
1113

1214
logger = logging.getLogger(__name__)
@@ -40,21 +42,31 @@ def run_task_from_queue(dispatcher):
4042
@task(queue='dab_broadcast')
4143
def manage_lost_tasks(grace_period: int = 10):
4244
cutoff_time = now() - timedelta(minutes=grace_period)
43-
ctl = Control('dab_broadcast')
4445
for task in Task.objects.filter(state=TASK_STATES.RUNNING, started_at__lt=cutoff_time).iterator():
45-
running_tasks = ctl.control_with_reply('running', data={'uuid': task.wrapper_uuid})
46-
print(running_tasks)
47-
print(task.wrapper_uuid)
48-
49-
# TODO: Add a "reaper" fallback task, this is currently blocked on dispatcher issue
50-
# https://github.com/ansible/dispatcher/issues/6
51-
# When run_task_from_queue starts the task, the uuid needs to be setup to be discoverable
52-
# this fallback method will query for tasks that are older than a certain grace period
53-
# later improvement: look for tasks older that the task timeout plus a grace period
54-
# these tasks are in arrears
55-
# For each task in arrears, we will:
56-
# 1. obtain a row-level lock for that task
57-
# 2. send a roll-call message looking for its uuid
58-
# 3. if we get no answer to the roll-call, the task status is changed to lost
59-
# 4. dependent on policies defined in settings, we may re-submit the task up to a retry count
60-
# 5. if retry count is exhausted, task is failed
46+
psycopg_params = get_pg_notify_params()
47+
psycopg_params.pop('autocommit') # dispatcher automatically adds this, causes error, TODO: need pre-check
48+
psycopg_params.pop('cursor_factory')
49+
psycopg_params.pop('context') # TODO: remove in inner method, makes non-async, not good
50+
51+
ctl = Control('dab_broadcast', config=psycopg_params)
52+
53+
# TODO: row-level lock for the task
54+
55+
running_tasks = ctl.control_with_reply('running', data={'uuid': str(task.wrapper_uuid)})
56+
57+
found = False
58+
for server_reply in running_tasks:
59+
for worker_id, task_data in server_reply:
60+
if task_data.get('uuid') == str(task.wrapper_uuid):
61+
found = True
62+
break
63+
if found:
64+
break
65+
66+
if not found:
67+
# TODO: feature of retry policy
68+
logger.warning(f'Could not find task {task.name} {task.wrapper_uuid}, deleting entry')
69+
task.delete()
70+
else:
71+
delta = now() - task.started_at
72+
logger.info(f'Noticed {task.name} {task.wrapper_uuid} running for {delta} seconds, seems to be fine')

0 commit comments

Comments
 (0)