Add home assistant cloud conversation (home-assistant#157090)

victorigualada · web-flow · commit 7c2741bd367f · 2025-11-25T14:04:19.000-05:00
diff --git a/homeassistant/components/cloud/__init__.py b/homeassistant/components/cloud/__init__.py
@@ -80,6 +80,7 @@
 PLATFORMS = [
     Platform.AI_TASK,
     Platform.BINARY_SENSOR,
+    Platform.CONVERSATION,
     Platform.STT,
     Platform.TTS,
 ]
diff --git a/homeassistant/components/cloud/const.py b/homeassistant/components/cloud/const.py
@@ -92,6 +92,7 @@
 STT_ENTITY_UNIQUE_ID = "cloud-speech-to-text"
 TTS_ENTITY_UNIQUE_ID = "cloud-text-to-speech"
 AI_TASK_ENTITY_UNIQUE_ID = "cloud-ai-task"
+CONVERSATION_ENTITY_UNIQUE_ID = "cloud-conversation-agent"
 
 LOGIN_MFA_TIMEOUT = 60
 
diff --git a/homeassistant/components/cloud/conversation.py b/homeassistant/components/cloud/conversation.py
@@ -0,0 +1,75 @@
+"""Conversation support for Home Assistant Cloud."""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from hass_nabucasa.llm import LLMError
+
+from homeassistant.components import conversation
+from homeassistant.config_entries import ConfigEntry
+from homeassistant.const import MATCH_ALL
+from homeassistant.core import HomeAssistant
+from homeassistant.helpers import llm
+from homeassistant.helpers.entity_platform import AddConfigEntryEntitiesCallback
+
+from .const import CONVERSATION_ENTITY_UNIQUE_ID, DATA_CLOUD, DOMAIN
+from .entity import BaseCloudLLMEntity
+
+
+async def async_setup_entry(
+    hass: HomeAssistant,
+    config_entry: ConfigEntry,
+    async_add_entities: AddConfigEntryEntitiesCallback,
+) -> None:
+    """Set up the Home Assistant Cloud conversation entity."""
+    cloud = hass.data[DATA_CLOUD]
+    try:
+        await cloud.llm.async_ensure_token()
+    except LLMError:
+        return
+
+    async_add_entities([CloudConversationEntity(cloud, config_entry)])
+
+
+class CloudConversationEntity(
+    conversation.ConversationEntity,
+    BaseCloudLLMEntity,
+):
+    """Home Assistant Cloud conversation agent."""
+
+    _attr_has_entity_name = True
+    _attr_name = "Home Assistant Cloud"
+    _attr_translation_key = "cloud_conversation"
+    _attr_unique_id = CONVERSATION_ENTITY_UNIQUE_ID
+    _attr_supported_features = conversation.ConversationEntityFeature.CONTROL
+
+    @property
+    def available(self) -> bool:
+        """Return if the entity is available."""
+        return self._cloud.is_logged_in and self._cloud.valid_subscription
+
+    @property
+    def supported_languages(self) -> list[str] | Literal["*"]:
+        """Return a list of supported languages."""
+        return MATCH_ALL
+
+    async def _async_handle_message(
+        self,
+        user_input: conversation.ConversationInput,
+        chat_log: conversation.ChatLog,
+    ) -> conversation.ConversationResult:
+        """Process a user input."""
+        try:
+            await chat_log.async_provide_llm_data(
+                user_input.as_llm_context(DOMAIN),
+                llm.LLM_API_ASSIST,
+                None,
+                user_input.extra_system_prompt,
+            )
+        except conversation.ConverseError as err:
+            return err.as_conversation_result()
+
+        await self._async_handle_chat_log("conversation", chat_log)
+
+        return conversation.async_get_result_from_chat_log(user_input, chat_log)
diff --git a/homeassistant/components/cloud/entity.py b/homeassistant/components/cloud/entity.py
@@ -1,7 +1,7 @@
 """Helpers for cloud LLM chat handling."""
 
 import base64
-from collections.abc import AsyncGenerator, Callable
+from collections.abc import AsyncGenerator, Callable, Iterable
 from enum import Enum
 import json
 import logging
@@ -16,13 +16,22 @@
     LLMResponseError,
     LLMServiceError,
 )
-from litellm import ResponseFunctionToolCall, ResponsesAPIStreamEvents
+from litellm import (
+    ResponseFunctionToolCall,
+    ResponseInputParam,
+    ResponsesAPIStreamEvents,
+)
 from openai.types.responses import (
     FunctionToolParam,
+    ResponseInputItemParam,
     ResponseReasoningItem,
     ToolParam,
     WebSearchToolParam,
 )
+from openai.types.responses.response_input_param import (
+    ImageGenerationCall as ImageGenerationCallParam,
+)
+from openai.types.responses.response_output_item import ImageGenerationCall
 import voluptuous as vol
 from voluptuous_openapi import convert
 
@@ -50,34 +59,97 @@ class ResponseItemType(str, Enum):
     IMAGE = "image"
 
 
-def _convert_content_to_chat_message(
-    content: conversation.Content,
-) -> dict[str, Any] | None:
-    """Convert ChatLog content to a responses message."""
-    if content.role not in ("user", "system", "tool_result", "assistant"):
-        return None
+def _convert_content_to_param(
+    chat_content: Iterable[conversation.Content],
+) -> ResponseInputParam:
+    """Convert any native chat message for this agent to the native format."""
+    messages: ResponseInputParam = []
+    reasoning_summary: list[str] = []
+    web_search_calls: dict[str, dict[str, Any]] = {}
 
-    text_content = cast(
-        conversation.SystemContent
-        | conversation.UserContent
-        | conversation.AssistantContent,
-        content,
-    )
+    for content in chat_content:
+        if isinstance(content, conversation.ToolResultContent):
+            if (
+                content.tool_name == "web_search_call"
+                and content.tool_call_id in web_search_calls
+            ):
+                web_search_call = web_search_calls.pop(content.tool_call_id)
+                web_search_call["status"] = content.tool_result.get(
+                    "status", "completed"
+                )
+                messages.append(cast("ResponseInputItemParam", web_search_call))
+            else:
+                messages.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": content.tool_call_id,
+                        "output": json.dumps(content.tool_result),
+                    }
+                )
+            continue
+
+        if content.content:
+            role: Literal["user", "assistant", "system", "developer"] = content.role
+            if role == "system":
+                role = "developer"
+            messages.append(
+                {"type": "message", "role": role, "content": content.content}
+            )
+
+        if isinstance(content, conversation.AssistantContent):
+            if content.tool_calls:
+                for tool_call in content.tool_calls:
+                    if (
+                        tool_call.external
+                        and tool_call.tool_name == "web_search_call"
+                        and "action" in tool_call.tool_args
+                    ):
+                        web_search_calls[tool_call.id] = {
+                            "type": "web_search_call",
+                            "id": tool_call.id,
+                            "action": tool_call.tool_args["action"],
+                            "status": "completed",
+                        }
+                    else:
+                        messages.append(
+                            {
+                                "type": "function_call",
+                                "name": tool_call.tool_name,
+                                "arguments": json.dumps(tool_call.tool_args),
+                                "call_id": tool_call.id,
+                            }
+                        )
 
-    if not text_content.content:
-        return None
+            if content.thinking_content:
+                reasoning_summary.append(content.thinking_content)
 
-    content_type = "output_text" if text_content.role == "assistant" else "input_text"
+            if isinstance(content.native, ResponseReasoningItem):
+                messages.append(
+                    {
+                        "type": "reasoning",
+                        "id": content.native.id,
+                        "summary": (
+                            [
+                                {
+                                    "type": "summary_text",
+                                    "text": summary,
+                                }
+                                for summary in reasoning_summary
+                            ]
+                            if content.thinking_content
+                            else []
+                        ),
+                        "encrypted_content": content.native.encrypted_content,
+                    }
+                )
+                reasoning_summary = []
 
-    return {
-        "role": text_content.role,
-        "content": [
-            {
-                "type": content_type,
-                "text": text_content.content,
-            }
-        ],
-    }
+            elif isinstance(content.native, ImageGenerationCall):
+                messages.append(
+                    cast(ImageGenerationCallParam, content.native.to_dict())
+                )
+
+    return messages
 
 
 def _format_tool(
@@ -381,25 +453,16 @@ def __init__(self, cloud: Cloud[CloudClient], config_entry: ConfigEntry) -> None
     async def _prepare_chat_for_generation(
         self,
         chat_log: conversation.ChatLog,
+        messages: ResponseInputParam,
         response_format: dict[str, Any] | None = None,
     ) -> dict[str, Any]:
         """Prepare kwargs for Cloud LLM from the chat log."""
 
-        messages = [
-            message
-            for content in chat_log.content
-            if (message := _convert_content_to_chat_message(content))
-        ]
-
-        if not messages or messages[-1]["role"] != "user":
-            raise HomeAssistantError("No user prompt found")
-
-        last_content = chat_log.content[-1]
+        last_content: Any = chat_log.content[-1]
         if last_content.role == "user" and last_content.attachments:
             files = await self._async_prepare_files_for_prompt(last_content.attachments)
-            user_message = messages[-1]
-            current_content = user_message.get("content", [])
-            user_message["content"] = [*(current_content or []), *files]
+            current_content = last_content.content
+            last_content = [*(current_content or []), *files]
 
         tools: list[ToolParam] = []
         tool_choice: str | None = None
@@ -503,8 +566,11 @@ async def _async_handle_chat_log(
                     },
                 }
 
+            messages = _convert_content_to_param(chat_log.content)
+
             response_kwargs = await self._prepare_chat_for_generation(
                 chat_log,
+                messages,
                 response_format,
             )
 
@@ -518,15 +584,21 @@ async def _async_handle_chat_log(
                         **response_kwargs,
                     )
 
-                async for _ in chat_log.async_add_delta_content_stream(
-                    agent_id=self.entity_id,
-                    stream=_transform_stream(
-                        chat_log,
-                        raw_stream,
-                        True,
-                    ),
-                ):
-                    pass
+                messages.extend(
+                    _convert_content_to_param(
+                        [
+                            content
+                            async for content in chat_log.async_add_delta_content_stream(
+                                self.entity_id,
+                                _transform_stream(
+                                    chat_log,
+                                    raw_stream,
+                                    True,
+                                ),
+                            )
+                        ]
+                    )
+                )
 
             except LLMAuthenticationError as err:
                 raise ConfigEntryAuthFailed("Cloud LLM authentication failed") from err
diff --git a/tests/components/cloud/test_conversation.py b/tests/components/cloud/test_conversation.py
diff --git a/tests/components/cloud/test_entity.py b/tests/components/cloud/test_entity.py

Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,7 @@`
`80`	`80`	`PLATFORMS = [`
`81`	`81`	`Platform.AI_TASK,`
`82`	`82`	`Platform.BINARY_SENSOR,`
	`83`	`+ Platform.CONVERSATION,`
`83`	`84`	`Platform.STT,`
`84`	`85`	`Platform.TTS,`
`85`	`86`	`]`