Skip to content

Commit 0e7f00c

Browse files
committed
Reduce log levels for watchdog
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
1 parent 96710e8 commit 0e7f00c

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

docling_jobkit/orchestrators/rq/orchestrator.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ async def _watchdog_task(self) -> None:
292292
may recreate Task objects on every poll, resetting started_at to the current
293293
time and making a task.started_at-based check unreliable.
294294
"""
295-
_log.warning("Watchdog starting")
295+
_log.info("Watchdog starting")
296296
# Maps task_id -> time the watchdog first observed the task in STARTED state.
297297
# Independent of task.started_at which may be reset by polling machinery.
298298
first_seen_started: dict[str, datetime.datetime] = {}
@@ -304,7 +304,7 @@ async def _watchdog_task(self) -> None:
304304
all_statuses = {
305305
tid: t.task_status for tid, t in list(self.tasks.items())
306306
}
307-
_log.warning(
307+
_log.debug(
308308
f"Watchdog scan: {len(self.tasks)} tasks in memory, "
309309
f"statuses={all_statuses}"
310310
)
@@ -325,15 +325,15 @@ async def _watchdog_task(self) -> None:
325325
# Remove tasks that are no longer STARTED (completed, failed, gone).
326326
for task_id in list(first_seen_started.keys()):
327327
if task_id not in currently_started:
328-
_log.warning(
328+
_log.debug(
329329
f"Watchdog: task {task_id} left STARTED, removing from tracking"
330330
)
331331
del first_seen_started[task_id]
332332

333333
# Record first observation time for newly STARTED tasks.
334334
for task_id in currently_started:
335335
if task_id not in first_seen_started:
336-
_log.warning(
336+
_log.debug(
337337
f"Watchdog: first observation of STARTED task {task_id}"
338338
)
339339
first_seen_started[task_id] = now
@@ -345,7 +345,7 @@ async def _watchdog_task(self) -> None:
345345
if (now - first_seen).total_seconds() > _WATCHDOG_GRACE_PERIOD
346346
]
347347

348-
_log.warning(
348+
_log.debug(
349349
f"Watchdog: {len(currently_started)} started, "
350350
f"{len(first_seen_started)} tracked, "
351351
f"{len(candidates)} past grace period"
@@ -355,7 +355,7 @@ async def _watchdog_task(self) -> None:
355355
key = f"{self.config.heartbeat_key_prefix}:{task_id}"
356356
alive = await self._async_redis_conn.exists(key)
357357
age = (now - first_seen_started[task_id]).total_seconds()
358-
_log.warning(
358+
_log.debug(
359359
f"Watchdog: checking task {task_id} "
360360
f"(age={age:.0f}s), heartbeat key alive={bool(alive)}"
361361
)
@@ -377,8 +377,8 @@ async def _watchdog_task(self) -> None:
377377
)
378378
# Remove from tracking so we don't re-publish if pub/sub is slow.
379379
del first_seen_started[task_id]
380-
except Exception as e:
381-
_log.error(f"Watchdog error: {e}")
380+
except Exception:
381+
_log.exception("Watchdog error")
382382

383383
async def process_queue(self):
384384
# Create a pool of workers

0 commit comments

Comments
 (0)