feat: add title_llm_profile support for auto-titling (#2515)

simonrosenberg · openhands-agent · Debug Agent · web-flow · commit b7fb93274d99 · 2026-04-18T16:19:51.000-03:00
Co-authored-by: openhands &lt;openhands@all-hands.dev&gt;
Co-authored-by: Debug Agent &lt;debug@example.com&gt;
diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py
@@ -827,8 +827,13 @@ async def __call__(self, event: Event) -> None:
         if not message_text:
             return
 
-        conversation = self.service._conversation
-        llm = conversation.agent.llm if conversation else None
+        # Precedence: title_llm_profile (if configured and loads) → agent.llm →
+        # truncation. This keeps auto-titling non-breaking for consumers who
+        # don't configure title_llm_profile.
+        title_llm = self._load_title_llm()
+        if title_llm is None:
+            conversation = self.service._conversation
+            title_llm = conversation.agent.llm if conversation else None
 
         async def _generate_and_save() -> None:
             try:
@@ -837,7 +842,7 @@ async def _generate_and_save() -> None:
                     None,
                     generate_title_from_message,
                     message_text,
-                    llm,
+                    title_llm,
                     50,
                 )
                 if title and self.service.stored.title is None:
@@ -853,6 +858,30 @@ async def _generate_and_save() -> None:
 
         asyncio.create_task(_generate_and_save())
 
+    def _load_title_llm(self) -> LLM | None:
+        """Load the LLM for title generation from profile store.
+
+        Returns:
+            LLM instance if title_llm_profile is configured and loads
+            successfully, None otherwise. When None is returned, the caller
+            falls back to the agent's LLM (and then to message truncation).
+        """
+        profile_name = self.service.stored.title_llm_profile
+        if not profile_name:
+            return None
+
+        try:
+            from openhands.sdk.llm.llm_profile_store import LLMProfileStore
+
+            profile_store = LLMProfileStore()
+            return profile_store.load(profile_name)
+        except (FileNotFoundError, ValueError) as e:
+            logger.warning(
+                f"Failed to load title LLM profile '{profile_name}': {e}. "
+                "Falling back to the agent's LLM."
+            )
+            return None
+
 
 @dataclass
 class WebhookSubscriber(Subscriber):
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -931,9 +931,13 @@ def ask_agent(self, question: str) -> str:
     def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str:
         """Generate a title for the conversation based on the first user message.
 
+        If an explicit LLM is provided, it takes precedence. Otherwise the
+        agent's LLM is used. If neither is available, the title falls back to
+        simple message truncation.
+
         Args:
-            llm: Optional LLM to use for title generation. If not provided,
-                 uses self.agent.llm.
+            llm: Optional LLM to use for title generation. Takes precedence
+                 over the agent's LLM when provided.
             max_length: Maximum length of the generated title.
 
         Returns:
@@ -942,16 +946,9 @@ def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str:
         Raises:
             ValueError: If no user messages are found in the conversation.
         """
-        # Use provided LLM or fall back to agent's LLM
-        llm_to_use = llm or self.agent.llm
-
-        # Skip LLM-based title generation for ACP agents with sentinel LLM
-        # The sentinel model "acp-managed" cannot make LLM calls directly
-        if llm_to_use.model == "acp-managed":
-            llm_to_use = None
-
+        effective_llm = llm if llm is not None else self.agent.llm
         return generate_conversation_title(
-            events=self._state.events, llm=llm_to_use, max_length=max_length
+            events=self._state.events, llm=effective_llm, max_length=max_length
         )
 
     def condense(self) -> None:
diff --git a/openhands-sdk/openhands/sdk/conversation/request.py b/openhands-sdk/openhands/sdk/conversation/request.py
@@ -146,7 +146,19 @@ class _StartConversationRequestBase(BaseModel):
         default=True,
         description=(
             "If true, automatically generate a title for the conversation from "
-            "the first user message using the conversation's LLM."
+            "the first user message. Precedence: title_llm_profile (if set and "
+            "loads) → agent.llm → message truncation."
+        ),
+    )
+    title_llm_profile: str | None = Field(
+        default=None,
+        description=(
+            "Optional LLM profile name for title generation. If set, the LLM "
+            "is loaded from LLMProfileStore (~/.openhands/profiles/) and used "
+            "for LLM-based title generation. This enables using a fast/cheap "
+            "model for titles regardless of the agent's main model. If not "
+            "set (or profile loading fails), title generation falls back to "
+            "the agent's LLM."
         ),
     )
 
diff --git a/tests/agent_server/test_conversation_service.py b/tests/agent_server/test_conversation_service.py
@@ -1656,7 +1656,10 @@ class TestAutoTitle:
     )
 
     def _make_service(
-        self, title: str | None = None, llm_model: str = "gpt-4o"
+        self,
+        title: str | None = None,
+        title_llm_profile: str | None = None,
+        llm_model: str = "gpt-4o",
     ) -> AsyncMock:
         stored = StoredConversation(
             id=uuid4(),
@@ -1666,6 +1669,7 @@ def _make_service(
             initial_message=None,
             metrics=None,
             title=title,
+            title_llm_profile=title_llm_profile,
         )
         service = AsyncMock(spec=EventService)
         service.stored = stored
@@ -1684,6 +1688,22 @@ def _user_message_event(self, text: str = "Fix the login bug") -> MessageEvent:
             llm_message=Message(role="user", content=[TextContent(text=text)]),
         )
 
+    @staticmethod
+    async def _drain_title_task(
+        predicate=lambda: True, max_iterations: int = 50, step: float = 0.02
+    ) -> None:
+        """Yield to the event loop until the background title task completes.
+
+        `AutoTitleSubscriber` schedules generation via `run_in_executor`, so a
+        single `await asyncio.sleep(0)` is not enough to let the executor
+        thread finish. Poll with a short sleep until `predicate()` becomes
+        truthy or the timeout elapses.
+        """
+        for _ in range(max_iterations):
+            await asyncio.sleep(step)
+            if predicate():
+                return
+
     @pytest.mark.asyncio
     async def test_autotitle_sets_title_on_first_user_message(self):
         """Title is generated and saved when the first user message arrives."""
@@ -1764,18 +1784,207 @@ async def test_autotitle_skips_empty_message(self):
 
         assert service.stored.title is None
 
+    @pytest.mark.asyncio
+    async def test_autotitle_uses_llm_profile_when_configured(self):
+        """Profile LLM takes precedence over agent.llm when configured."""
+        service = self._make_service(title_llm_profile="cheap-model")
+        mock_llm = LLM(model="gpt-3.5-turbo", usage_id="title-llm")
+
+        with (
+            patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore,
+            patch(
+                self._GENERATE_TITLE_PATH, return_value="✨ Profile LLM Title"
+            ) as mock_generate_title,
+        ):
+            mock_store_instance = MockStore.return_value
+            mock_store_instance.load.return_value = mock_llm
+
+            subscriber = AutoTitleSubscriber(service=service)
+            await subscriber(self._user_message_event())
+            await self._drain_title_task(lambda: service.stored.title is not None)
+
+            MockStore.assert_called_once_with()
+            mock_store_instance.load.assert_called_once_with("cheap-model")
+            # Profile-loaded LLM wins over agent.llm
+            assert mock_generate_title.called
+            assert mock_generate_title.call_args.args[1] is mock_llm
+
+        assert service.stored.title == "✨ Profile LLM Title"
+        service.save_meta.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_autotitle_falls_back_to_agent_llm_when_profile_not_found(self):
+        """Missing profile → fall back to agent.llm (non-breaking behavior)."""
+        service = self._make_service(title_llm_profile="nonexistent-profile")
+        agent_llm = service._conversation.agent.llm
+
+        with (
+            patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore,
+            patch(
+                self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title"
+            ) as mock_generate_title,
+        ):
+            mock_store_instance = MockStore.return_value
+            mock_store_instance.load.side_effect = FileNotFoundError(
+                "Profile 'nonexistent-profile' not found"
+            )
+
+            subscriber = AutoTitleSubscriber(service=service)
+            await subscriber(self._user_message_event())
+            await self._drain_title_task(lambda: service.stored.title is not None)
+
+            # Failed profile load → falls back to agent.llm
+            assert mock_generate_title.called
+            assert mock_generate_title.call_args.args[1] is agent_llm
+
+        assert service.stored.title == "✨ Agent LLM Title"
+        service.save_meta.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_autotitle_no_profile_uses_agent_llm(self):
+        """No profile configured → use agent.llm (preserves existing behavior)."""
+        service = self._make_service(title_llm_profile=None)
+        agent_llm = service._conversation.agent.llm
+
+        with patch(
+            self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title"
+        ) as mock_generate_title:
+            subscriber = AutoTitleSubscriber(service=service)
+            await subscriber(self._user_message_event())
+            await self._drain_title_task(lambda: service.stored.title is not None)
+
+            # No profile → agent.llm is used (backwards compatible)
+            assert mock_generate_title.called
+            assert mock_generate_title.call_args.args[1] is agent_llm
+
+        assert service.stored.title == "✨ Agent LLM Title"
+        service.save_meta.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_autotitle_handles_profile_load_value_error(self):
+        """Profile load ValueError → fall back to agent.llm."""
+        service = self._make_service(title_llm_profile="corrupted-profile")
+        agent_llm = service._conversation.agent.llm
+
+        with (
+            patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore,
+            patch(
+                self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title"
+            ) as mock_generate_title,
+        ):
+            mock_store_instance = MockStore.return_value
+            mock_store_instance.load.side_effect = ValueError("Invalid profile format")
+
+            subscriber = AutoTitleSubscriber(service=service)
+            await subscriber(self._user_message_event())
+            await self._drain_title_task(lambda: service.stored.title is not None)
+
+            assert mock_generate_title.called
+            assert mock_generate_title.call_args.args[1] is agent_llm
+
+        assert service.stored.title == "✨ Agent LLM Title"
+        service.save_meta.assert_called_once()
+
     @pytest.mark.asyncio
     async def test_autotitle_falls_back_for_acp_managed_llm(self):
-        """ACP-managed agents should skip LLM title generation and fall back."""
+        """ACP-managed agents with no title profile → truncation fallback."""
         service = self._make_service(llm_model="acp-managed")
         subscriber = AutoTitleSubscriber(service=service)
 
         await subscriber(self._user_message_event("Fix the login bug"))
-        await asyncio.sleep(0)
+        await self._drain_title_task(lambda: service.stored.title is not None)
 
         assert service.stored.title == "Fix the login bug"
         service.save_meta.assert_called_once()
 
+    @pytest.mark.asyncio
+    async def test_autotitle_integration_routes_through_profile_store(self, tmp_path):
+        """End-to-end: profile on disk → LLMProfileStore.load → title LLM call.
+
+        Exercises the real wiring from AutoTitleSubscriber through LLMProfileStore
+        to LLM.completion. Only the network boundary (LLM.completion) is mocked,
+        so this catches regressions in profile loading, LLM passthrough, and the
+        agent-server → SDK integration — the unit tests above only exercise
+        AutoTitleSubscriber in isolation.
+        """
+        from litellm.types.utils import (
+            Choices,
+            Message as LiteLLMMessage,
+            ModelResponse,
+            Usage,
+        )
+
+        from openhands.sdk.llm import LLMResponse, MetricsSnapshot
+        from openhands.sdk.llm.llm_profile_store import LLMProfileStore
+
+        # Persist a real LLM profile to disk with a distinctive usage_id so we
+        # can tell the title LLM apart from the agent's LLM in the assertion.
+        profile_dir = tmp_path / "profiles"
+        title_llm_on_disk = LLM(
+            usage_id="title-llm",
+            model="claude-haiku-4-5",
+            api_key=SecretStr("title-key"),
+        )
+        LLMProfileStore(base_dir=profile_dir).save(
+            "title-fast", title_llm_on_disk, include_secrets=True
+        )
+
+        service = self._make_service(title_llm_profile="title-fast")
+
+        calls: list[str] = []
+
+        def fake_completion(self_llm, _messages, **_kwargs):
+            calls.append(self_llm.usage_id)
+            msg = LiteLLMMessage(content="✨ Generated", role="assistant")
+            choice = Choices(finish_reason="stop", index=0, message=msg)
+            raw = ModelResponse(
+                id="resp-1",
+                choices=[choice],
+                created=0,
+                model=self_llm.model,
+                object="chat.completion",
+                usage=Usage(prompt_tokens=1, completion_tokens=1, total_tokens=2),
+            )
+            return LLMResponse(
+                message=Message.from_llm_chat_message(choice["message"]),
+                metrics=MetricsSnapshot(
+                    model_name=self_llm.model,
+                    accumulated_cost=0.0,
+                    max_budget_per_task=None,
+                    accumulated_token_usage=None,
+                ),
+                raw_response=raw,
+            )
+
+        # Point LLMProfileStore() (no args) at our tmp dir so the real
+        # _load_title_llm code path finds our on-disk profile.
+        with (
+            patch(
+                "openhands.sdk.llm.llm_profile_store._DEFAULT_PROFILE_DIR", profile_dir
+            ),
+            patch(
+                "openhands.sdk.llm.llm.LLM.completion",
+                autospec=True,
+                side_effect=fake_completion,
+            ),
+        ):
+            subscriber = AutoTitleSubscriber(service=service)
+            await subscriber(self._user_message_event("Fix the login bug"))
+            # Wait for the background executor task to complete. The production
+            # code uses run_in_executor, so sleep(0) is not enough.
+            for _ in range(50):
+                await asyncio.sleep(0.02)
+                if service.stored.title is not None:
+                    break
+
+        # The profile's LLM (usage_id="title-llm") was called — not agent.llm
+        # (usage_id="test-llm"). This is the regression-sensitive assertion.
+        assert calls == ["title-llm"], (
+            f"Expected only the title profile LLM to be called, got: {calls}"
+        )
+        assert service.stored.title == "✨ Generated"
+        service.save_meta.assert_called_once()
+
 
 class TestACPActivityHeartbeatWiring:
     """Tests for _setup_acp_activity_heartbeat in EventService."""
diff --git a/tests/sdk/conversation/test_generate_title.py b/tests/sdk/conversation/test_generate_title.py