@@ -114,27 +114,26 @@ async def run_live(
114
114
async with llm .connect (llm_request ) as llm_connection :
115
115
if llm_request .contents :
116
116
# Sends the conversation history to the model.
117
- # with tracer.start_as_current_span('send_data'):
118
-
119
- if invocation_context .transcription_cache :
120
- from . import audio_transcriber
121
-
122
- audio_transcriber = audio_transcriber .AudioTranscriber (
123
- init_client = True
124
- if invocation_context .run_config .input_audio_transcription
125
- is None
126
- else False
127
- )
128
- contents = audio_transcriber .transcribe_file (invocation_context )
129
- logger .debug ('Sending history to model: %s' , contents )
130
- await llm_connection .send_history (contents )
131
- invocation_context .transcription_cache = None
132
- trace_send_data (invocation_context , event_id , contents )
133
- else :
134
- await llm_connection .send_history (llm_request .contents )
135
- trace_send_data (
136
- invocation_context , event_id , llm_request .contents
137
- )
117
+ with tracer .start_as_current_span ('send_data' ):
118
+ if invocation_context .transcription_cache :
119
+ from . import audio_transcriber
120
+
121
+ audio_transcriber = audio_transcriber .AudioTranscriber (
122
+ init_client = True
123
+ if invocation_context .run_config .input_audio_transcription
124
+ is None
125
+ else False
126
+ )
127
+ contents = audio_transcriber .transcribe_file (invocation_context )
128
+ logger .debug ('Sending history to model: %s' , contents )
129
+ await llm_connection .send_history (contents )
130
+ invocation_context .transcription_cache = None
131
+ trace_send_data (invocation_context , event_id , contents )
132
+ else :
133
+ await llm_connection .send_history (llm_request .contents )
134
+ trace_send_data (
135
+ invocation_context , event_id , llm_request .contents
136
+ )
138
137
139
138
send_task = asyncio .create_task (
140
139
self ._send_to_model (llm_connection , invocation_context )
@@ -641,65 +640,65 @@ async def _call_llm_async(
641
640
llm = self .__get_llm (invocation_context )
642
641
643
642
async def _call_llm_with_tracing () -> AsyncGenerator [LlmResponse , None ]:
644
- # with tracer.start_as_current_span('call_llm'):
645
- if invocation_context .run_config .support_cfc :
646
- invocation_context .live_request_queue = LiveRequestQueue ()
647
- responses_generator = self .run_live (invocation_context )
648
- async with Aclosing (
649
- self ._run_and_handle_error (
650
- responses_generator ,
651
- invocation_context ,
652
- llm_request ,
653
- model_response_event ,
654
- )
655
- ) as agen :
656
- async for llm_response in agen :
657
- # Runs after_model_callback if it exists.
658
- if altered_llm_response := await self ._handle_after_model_callback (
659
- invocation_context , llm_response , model_response_event
660
- ):
661
- llm_response = altered_llm_response
662
- # only yield partial response in SSE streaming mode
663
- if (
664
- invocation_context .run_config .streaming_mode
665
- == StreamingMode .SSE
666
- or not llm_response .partial
667
- ):
668
- yield llm_response
669
- if llm_response .turn_complete :
670
- invocation_context .live_request_queue .close ()
671
- else :
672
- # Check if we can make this llm call or not. If the current call
673
- # pushes the counter beyond the max set value, then the execution is
674
- # stopped right here, and exception is thrown.
675
- invocation_context .increment_llm_call_count ()
676
- responses_generator = llm .generate_content_async (
677
- llm_request ,
678
- stream = invocation_context .run_config .streaming_mode
679
- == StreamingMode .SSE ,
680
- )
681
- async with Aclosing (
682
- self ._run_and_handle_error (
683
- responses_generator ,
684
- invocation_context ,
685
- llm_request ,
686
- model_response_event ,
687
- )
688
- ) as agen :
689
- async for llm_response in agen :
690
- trace_call_llm (
691
- invocation_context ,
692
- model_response_event .id ,
693
- llm_request ,
694
- llm_response ,
695
- )
696
- # Runs after_model_callback if it exists.
697
- if altered_llm_response := await self ._handle_after_model_callback (
698
- invocation_context , llm_response , model_response_event
699
- ):
700
- llm_response = altered_llm_response
643
+ with tracer .start_as_current_span ('call_llm' ):
644
+ if invocation_context .run_config .support_cfc :
645
+ invocation_context .live_request_queue = LiveRequestQueue ()
646
+ responses_generator = self .run_live (invocation_context )
647
+ async with Aclosing (
648
+ self ._run_and_handle_error (
649
+ responses_generator ,
650
+ invocation_context ,
651
+ llm_request ,
652
+ model_response_event ,
653
+ )
654
+ ) as agen :
655
+ async for llm_response in agen :
656
+ # Runs after_model_callback if it exists.
657
+ if altered_llm_response := await self ._handle_after_model_callback (
658
+ invocation_context , llm_response , model_response_event
659
+ ):
660
+ llm_response = altered_llm_response
661
+ # only yield partial response in SSE streaming mode
662
+ if (
663
+ invocation_context .run_config .streaming_mode
664
+ == StreamingMode .SSE
665
+ or not llm_response .partial
666
+ ):
667
+ yield llm_response
668
+ if llm_response .turn_complete :
669
+ invocation_context .live_request_queue .close ()
670
+ else :
671
+ # Check if we can make this llm call or not. If the current call
672
+ # pushes the counter beyond the max set value, then the execution is
673
+ # stopped right here, and exception is thrown.
674
+ invocation_context .increment_llm_call_count ()
675
+ responses_generator = llm .generate_content_async (
676
+ llm_request ,
677
+ stream = invocation_context .run_config .streaming_mode
678
+ == StreamingMode .SSE ,
679
+ )
680
+ async with Aclosing (
681
+ self ._run_and_handle_error (
682
+ responses_generator ,
683
+ invocation_context ,
684
+ llm_request ,
685
+ model_response_event ,
686
+ )
687
+ ) as agen :
688
+ async for llm_response in agen :
689
+ trace_call_llm (
690
+ invocation_context ,
691
+ model_response_event .id ,
692
+ llm_request ,
693
+ llm_response ,
694
+ )
695
+ # Runs after_model_callback if it exists.
696
+ if altered_llm_response := await self ._handle_after_model_callback (
697
+ invocation_context , llm_response , model_response_event
698
+ ):
699
+ llm_response = altered_llm_response
701
700
702
- yield llm_response
701
+ yield llm_response
703
702
704
703
async with Aclosing (_call_llm_with_tracing ()) as agen :
705
704
async for event in agen :
0 commit comments