1- """Episode queue orchestration."""
1+ """Episode queue orchestration with retry tracking ."""
22
33from __future__ import annotations
44
55import asyncio
66import logging
7+ from datetime import UTC , datetime
8+ from typing import Any
79
810from . import state
911
1012logger = logging .getLogger (__name__ )
1113
14+ MAX_QUEUE_RETRIES = 3
15+ RETRY_BACKOFF_SECONDS = 0.5
16+
17+
18+ def _metadata (process_func : state .EpisodeProcessor ) -> dict [str , Any ]:
19+ metadata = getattr (process_func , 'queue_metadata' , {})
20+ if not isinstance (metadata , dict ):
21+ metadata = {}
22+ metadata .setdefault ('attempts' , 0 )
23+ return metadata
24+
25+
26+ def _record_failure (group_id : str , metadata : dict [str , Any ], exc : Exception ) -> None :
27+ name = metadata .get ('name' , 'unknown' )
28+ attempts = metadata .get ('attempts' , 0 )
29+ failure = {
30+ 'name' : str (name ),
31+ 'error' : f'{ exc .__class__ .__name__ } : { exc } ' ,
32+ 'attempts' : str (attempts ),
33+ 'timestamp' : datetime .now (UTC ).isoformat (),
34+ }
35+
36+ failures = state .queue_failures .setdefault (group_id , [])
37+ if name :
38+ failures = [entry for entry in failures if entry .get ('name' ) != name ]
39+ failures .append (failure )
40+ state .queue_failures [group_id ] = failures
41+
42+
43+ def _clear_failure (group_id : str , metadata : dict [str , Any ]) -> None :
44+ name = metadata .get ('name' )
45+ if not name :
46+ return
47+ failures = state .queue_failures .get (group_id )
48+ if not failures :
49+ return
50+ remaining = [entry for entry in failures if entry .get ('name' ) != name ]
51+ if remaining :
52+ state .queue_failures [group_id ] = remaining
53+ else :
54+ state .queue_failures .pop (group_id , None )
55+
1256
1357async def process_episode_queue (group_id : str ) -> None :
1458 """Process episodes for a specific group_id sequentially."""
@@ -18,14 +62,33 @@ async def process_episode_queue(group_id: str) -> None:
1862 try :
1963 while True :
2064 process_func = await state .episode_queues [group_id ].get ()
65+ metadata = _metadata (process_func )
66+ name = metadata .get ('name' , 'queued-episode' )
2167 try :
2268 await process_func ()
69+ metadata ['attempts' ] = 0
70+ _clear_failure (group_id , metadata )
2371 except Exception as exc : # pragma: no cover - defensive logging
24- logger .error (
25- 'Error processing queued episode for group_id %s: %s' ,
72+ metadata ['attempts' ] = metadata .get ('attempts' , 0 ) + 1
73+ _record_failure (group_id , metadata , exc )
74+ attempt = metadata ['attempts' ]
75+ logger .exception (
76+ "Error processing queued episode '%s' for group_id %s (attempt %s/%s)" ,
77+ name ,
2678 group_id ,
27- exc ,
79+ attempt ,
80+ MAX_QUEUE_RETRIES ,
2881 )
82+ if attempt < MAX_QUEUE_RETRIES :
83+ setattr (process_func , 'queue_metadata' , metadata )
84+ await asyncio .sleep (RETRY_BACKOFF_SECONDS )
85+ await state .episode_queues [group_id ].put (process_func )
86+ else :
87+ logger .error (
88+ "Episode '%s' for group_id %s exceeded max retries and will be discarded" ,
89+ name ,
90+ group_id ,
91+ )
2992 finally :
3093 state .episode_queues [group_id ].task_done ()
3194 except asyncio .CancelledError :
@@ -46,6 +109,9 @@ async def enqueue_episode(group_id: str, process_func: state.EpisodeProcessor) -
46109 if group_id not in state .episode_queues :
47110 state .episode_queues [group_id ] = asyncio .Queue ()
48111
112+ metadata = _metadata (process_func )
113+ setattr (process_func , 'queue_metadata' , metadata )
114+
49115 await state .episode_queues [group_id ].put (process_func )
50116
51117 if not state .queue_workers .get (group_id , False ):
@@ -54,4 +120,4 @@ async def enqueue_episode(group_id: str, process_func: state.EpisodeProcessor) -
54120 return state .episode_queues [group_id ].qsize ()
55121
56122
57- __all__ = ['enqueue_episode' , 'process_episode_queue' ]
123+ __all__ = ['MAX_QUEUE_RETRIES' , ' enqueue_episode' , 'process_episode_queue' ]
0 commit comments