@@ -166,7 +166,7 @@ def decorated_method():
166166
167167 if is_stream :
168168 return self ._stream_with_fallback (
169- decorated_method , method_name , * args , ** kwargs
169+ decorated_method , method_name , decorators , * args , ** kwargs
170170 )
171171
172172 try :
@@ -187,14 +187,27 @@ def decorated_method():
187187 f"{ fallback .model_id } . Error: { str (e )} "
188188 )
189189
190- fallback_method = getattr (
191- fallback , method_name .replace ("_raw_" , "" )
192- )
190+ # Apply decorators to fallback's raw method directly — calling
191+ # fallback.gen() would re-enter the orchestrator and recurse via
192+ # fallback.fallback_llm.
193+ fallback_method = getattr (fallback , method_name )
194+ for decorator in decorators :
195+ fallback_method = decorator (fallback_method )
193196 fallback_kwargs = {** kwargs , "model" : fallback .model_id }
194- return fallback_method (* args , ** fallback_kwargs )
197+ try :
198+ return fallback_method (fallback , * args , ** fallback_kwargs )
199+ except Exception as e2 :
200+ if self ._is_non_retriable_client_error (e2 ):
201+ logger .error (
202+ f"Fallback LLM failed with non-retriable client "
203+ f"error; giving up: { str (e2 )} "
204+ )
205+ else :
206+ logger .error (f"Fallback LLM also failed; giving up: { str (e2 )} " )
207+ raise
195208
196209 def _stream_with_fallback (
197- self , decorated_method , method_name , * args , ** kwargs
210+ self , decorated_method , method_name , decorators , * args , ** kwargs
198211 ):
199212 """
200213 Wrapper generator that catches mid-stream errors and falls back.
@@ -223,11 +236,37 @@ def _stream_with_fallback(
223236 f"Primary LLM failed mid-stream. Falling back to "
224237 f"{ fallback .model_id } . Error: { str (e )} "
225238 )
226- fallback_method = getattr (
227- fallback , method_name .replace ("_raw_" , "" )
239+ # Apply decorators to fallback's raw stream method directly —
240+ # calling fallback.gen_stream() would re-enter the orchestrator
241+ # and recurse via fallback.fallback_llm. Emit the stream-start
242+ # event manually so dashboards still see the fallback's
243+ # provider/model when the response actually comes from it.
244+ fallback ._emit_stream_start_log (
245+ fallback .model_id ,
246+ kwargs .get ("messages" ),
247+ kwargs .get ("tools" ),
248+ bool (
249+ kwargs .get ("_usage_attachments" )
250+ or kwargs .get ("attachments" )
251+ ),
228252 )
253+ fallback_method = getattr (fallback , method_name )
254+ for decorator in decorators :
255+ fallback_method = decorator (fallback_method )
229256 fallback_kwargs = {** kwargs , "model" : fallback .model_id }
230- yield from fallback_method (* args , ** fallback_kwargs )
257+ try :
258+ yield from fallback_method (fallback , * args , ** fallback_kwargs )
259+ except Exception as e2 :
260+ if self ._is_non_retriable_client_error (e2 ):
261+ logger .error (
262+ f"Fallback LLM failed mid-stream with non-retriable "
263+ f"client error; giving up: { str (e2 )} "
264+ )
265+ else :
266+ logger .error (
267+ f"Fallback LLM also failed mid-stream; giving up: { str (e2 )} "
268+ )
269+ raise
231270
232271 def gen (self , model , messages , stream = False , tools = None , * args , ** kwargs ):
233272 decorators = [gen_token_usage , gen_cache ]
@@ -242,22 +281,29 @@ def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
242281 ** kwargs ,
243282 )
244283
245- def gen_stream (self , model , messages , stream = True , tools = None , * args , ** kwargs ):
246- # Attachments arrive as ``_usage_attachments `` from ``Agent._llm_gen``;
247- # the ``stream_token_usage`` decorator pops that key, but the log
248- # fires before the decorator runs so it's still in ``kwargs`` here .
284+ def _emit_stream_start_log (self , model , messages , tools , has_attachments ):
285+ # Stamped with ``self.provider_name `` so dashboards can group calls
286+ # by vendor; the fallback path emits its own copy on the fallback
287+ # instance so the actual responding provider is recorded .
249288 logging .info (
250289 "llm_stream_start" ,
251290 extra = {
252291 "model" : model ,
253292 "provider" : self .provider_name ,
254293 "message_count" : len (messages ) if messages is not None else 0 ,
255- "has_attachments" : bool (
256- kwargs .get ("_usage_attachments" ) or kwargs .get ("attachments" )
257- ),
294+ "has_attachments" : bool (has_attachments ),
258295 "has_tools" : bool (tools ),
259296 },
260297 )
298+
299+ def gen_stream (self , model , messages , stream = True , tools = None , * args , ** kwargs ):
300+ # Attachments arrive as ``_usage_attachments`` from ``Agent._llm_gen``;
301+ # the ``stream_token_usage`` decorator pops that key, but the log
302+ # fires before the decorator runs so it's still in ``kwargs`` here.
303+ has_attachments = bool (
304+ kwargs .get ("_usage_attachments" ) or kwargs .get ("attachments" )
305+ )
306+ self ._emit_stream_start_log (model , messages , tools , has_attachments )
261307 decorators = [stream_cache , stream_token_usage ]
262308 return self ._execute_with_fallback (
263309 "_raw_gen_stream" ,
0 commit comments