@@ -292,7 +292,7 @@ async def _watchdog_task(self) -> None:
292292 may recreate Task objects on every poll, resetting started_at to the current
293293 time and making a task.started_at-based check unreliable.
294294 """
295- _log .warning ("Watchdog starting" )
295+ _log .info ("Watchdog starting" )
296296 # Maps task_id -> time the watchdog first observed the task in STARTED state.
297297 # Independent of task.started_at which may be reset by polling machinery.
298298 first_seen_started : dict [str , datetime .datetime ] = {}
@@ -304,7 +304,7 @@ async def _watchdog_task(self) -> None:
304304 all_statuses = {
305305 tid : t .task_status for tid , t in list (self .tasks .items ())
306306 }
307- _log .warning (
307+ _log .debug (
308308 f"Watchdog scan: { len (self .tasks )} tasks in memory, "
309309 f"statuses={ all_statuses } "
310310 )
@@ -325,15 +325,15 @@ async def _watchdog_task(self) -> None:
325325 # Remove tasks that are no longer STARTED (completed, failed, gone).
326326 for task_id in list (first_seen_started .keys ()):
327327 if task_id not in currently_started :
328- _log .warning (
328+ _log .debug (
329329 f"Watchdog: task { task_id } left STARTED, removing from tracking"
330330 )
331331 del first_seen_started [task_id ]
332332
333333 # Record first observation time for newly STARTED tasks.
334334 for task_id in currently_started :
335335 if task_id not in first_seen_started :
336- _log .warning (
336+ _log .debug (
337337 f"Watchdog: first observation of STARTED task { task_id } "
338338 )
339339 first_seen_started [task_id ] = now
@@ -345,7 +345,7 @@ async def _watchdog_task(self) -> None:
345345 if (now - first_seen ).total_seconds () > _WATCHDOG_GRACE_PERIOD
346346 ]
347347
348- _log .warning (
348+ _log .debug (
349349 f"Watchdog: { len (currently_started )} started, "
350350 f"{ len (first_seen_started )} tracked, "
351351 f"{ len (candidates )} past grace period"
@@ -355,7 +355,7 @@ async def _watchdog_task(self) -> None:
355355 key = f"{ self .config .heartbeat_key_prefix } :{ task_id } "
356356 alive = await self ._async_redis_conn .exists (key )
357357 age = (now - first_seen_started [task_id ]).total_seconds ()
358- _log .warning (
358+ _log .debug (
359359 f"Watchdog: checking task { task_id } "
360360 f"(age={ age :.0f} s), heartbeat key alive={ bool (alive )} "
361361 )
@@ -377,8 +377,8 @@ async def _watchdog_task(self) -> None:
377377 )
378378 # Remove from tracking so we don't re-publish if pub/sub is slow.
379379 del first_seen_started [task_id ]
380- except Exception as e :
381- _log .error ( f "Watchdog error: { e } " )
380+ except Exception :
381+ _log .exception ( "Watchdog error" )
382382
383383 async def process_queue (self ):
384384 # Create a pool of workers
0 commit comments