fix: change LlmResponse to use Content for transcriptions

hangfei · copybara-github · commit 3b997a0a07d1 · 2025-08-26T21:12:59.000-07:00
The transcription change breaks the multi-agent transfer during live/bidi.

Updates `GeminiLlmConnection` to populate the `content` field of `LlmResponse` with `types.Content` and `types.Part` objects for both input and output transcriptions, instead of using dedicated transcription fields. Also removes a debug print from `audio_cache_manager.py`.

the transcription is not fully ready to be used yet so roll back the transcription change.

PiperOrigin-RevId: 799851950
diff --git a/src/google/adk/flows/llm_flows/audio_cache_manager.py b/src/google/adk/flows/llm_flows/audio_cache_manager.py
@@ -141,7 +141,6 @@ async def _flush_cache_to_services(
     Returns:
       True if the cache was successfully flushed, False otherwise.
     """
-    print('flush cache')
     if not invocation_context.artifact_service or not audio_cache:
       logger.debug('Skipping cache flush: no artifact service or empty cache')
       return False
diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
@@ -164,8 +164,14 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
               message.server_content.input_transcription
               and message.server_content.input_transcription.text
           ):
+            user_text = message.server_content.input_transcription.text
+            parts = [
+                types.Part.from_text(
+                    text=user_text,
+                )
+            ]
             llm_response = LlmResponse(
-                input_transcription=message.server_content.input_transcription,
+                content=types.Content(role='user', parts=parts)
             )
             yield llm_response
           if (
@@ -180,8 +186,13 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
             # We rely on other control signals to determine when to yield the
             # full text response(turn_complete, interrupted, or tool_call).
             text += message.server_content.output_transcription.text
+            parts = [
+                types.Part.from_text(
+                    text=message.server_content.output_transcription.text
+                )
+            ]
             llm_response = LlmResponse(
-                output_transcription=message.server_content.output_transcription
+                content=types.Content(role='model', parts=parts), partial=True
             )
             yield llm_response