|
7 | 7 | from django.db import transaction |
8 | 8 | from django.utils.timezone import now, timedelta |
9 | 9 |
|
| 10 | +from ansible_base.lib.utils.db import get_pg_notify_params |
| 11 | + |
10 | 12 | from ansible_base.task.models import TASK_STATES, Task |
11 | 13 |
|
12 | 14 | logger = logging.getLogger(__name__) |
@@ -40,21 +42,31 @@ def run_task_from_queue(dispatcher): |
40 | 42 | @task(queue='dab_broadcast') |
41 | 43 | def manage_lost_tasks(grace_period: int = 10): |
42 | 44 | cutoff_time = now() - timedelta(minutes=grace_period) |
43 | | - ctl = Control('dab_broadcast') |
44 | 45 | for task in Task.objects.filter(state=TASK_STATES.RUNNING, started_at__lt=cutoff_time).iterator(): |
45 | | - running_tasks = ctl.control_with_reply('running', data={'uuid': task.wrapper_uuid}) |
46 | | - print(running_tasks) |
47 | | - print(task.wrapper_uuid) |
48 | | - |
49 | | -# TODO: Add a "reaper" fallback task, this is currently blocked on dispatcher issue |
50 | | -# https://github.com/ansible/dispatcher/issues/6 |
51 | | -# When run_task_from_queue starts the task, the uuid needs to be setup to be discoverable |
52 | | -# this fallback method will query for tasks that are older than a certain grace period |
53 | | -# later improvement: look for tasks older that the task timeout plus a grace period |
54 | | -# these tasks are in arrears |
55 | | -# For each task in arrears, we will: |
56 | | -# 1. obtain a row-level lock for that task |
57 | | -# 2. send a roll-call message looking for its uuid |
58 | | -# 3. if we get no answer to the roll-call, the task status is changed to lost |
59 | | -# 4. dependent on policies defined in settings, we may re-submit the task up to a retry count |
60 | | -# 5. if retry count is exhausted, task is failed |
| 46 | + psycopg_params = get_pg_notify_params() |
| 47 | + psycopg_params.pop('autocommit') # dispatcher automatically adds this, causes error, TODO: need pre-check |
| 48 | + psycopg_params.pop('cursor_factory') |
| 49 | + psycopg_params.pop('context') # TODO: remove in inner method, makes non-async, not good |
| 50 | + |
| 51 | + ctl = Control('dab_broadcast', config=psycopg_params) |
| 52 | + |
| 53 | + # TODO: row-level lock for the task |
| 54 | + |
| 55 | + running_tasks = ctl.control_with_reply('running', data={'uuid': str(task.wrapper_uuid)}) |
| 56 | + |
| 57 | + found = False |
| 58 | + for server_reply in running_tasks: |
| 59 | + for worker_id, task_data in server_reply: |
| 60 | + if task_data.get('uuid') == str(task.wrapper_uuid): |
| 61 | + found = True |
| 62 | + break |
| 63 | + if found: |
| 64 | + break |
| 65 | + |
| 66 | + if not found: |
| 67 | + # TODO: feature of retry policy |
| 68 | + logger.warning(f'Could not find task {task.name} {task.wrapper_uuid}, deleting entry') |
| 69 | + task.delete() |
| 70 | + else: |
| 71 | + delta = now() - task.started_at |
| 72 | + logger.info(f'Noticed {task.name} {task.wrapper_uuid} running for {delta} seconds, seems to be fine') |
0 commit comments