diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index 3cc2bcbb68..067e98502d 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -827,8 +827,13 @@ async def __call__(self, event: Event) -> None: if not message_text: return - conversation = self.service._conversation - llm = conversation.agent.llm if conversation else None + # Precedence: title_llm_profile (if configured and loads) → agent.llm → + # truncation. This keeps auto-titling non-breaking for consumers who + # don't configure title_llm_profile. + title_llm = self._load_title_llm() + if title_llm is None: + conversation = self.service._conversation + title_llm = conversation.agent.llm if conversation else None async def _generate_and_save() -> None: try: @@ -837,7 +842,7 @@ async def _generate_and_save() -> None: None, generate_title_from_message, message_text, - llm, + title_llm, 50, ) if title and self.service.stored.title is None: @@ -853,6 +858,30 @@ async def _generate_and_save() -> None: asyncio.create_task(_generate_and_save()) + def _load_title_llm(self) -> LLM | None: + """Load the LLM for title generation from profile store. + + Returns: + LLM instance if title_llm_profile is configured and loads + successfully, None otherwise. When None is returned, the caller + falls back to the agent's LLM (and then to message truncation). + """ + profile_name = self.service.stored.title_llm_profile + if not profile_name: + return None + + try: + from openhands.sdk.llm.llm_profile_store import LLMProfileStore + + profile_store = LLMProfileStore() + return profile_store.load(profile_name) + except (FileNotFoundError, ValueError) as e: + logger.warning( + f"Failed to load title LLM profile '{profile_name}': {e}. " + "Falling back to the agent's LLM." + ) + return None + @dataclass class WebhookSubscriber(Subscriber): diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 99c794d1ff..a2b6b3438e 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -931,9 +931,13 @@ def ask_agent(self, question: str) -> str: def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str: """Generate a title for the conversation based on the first user message. + If an explicit LLM is provided, it takes precedence. Otherwise the + agent's LLM is used. If neither is available, the title falls back to + simple message truncation. + Args: - llm: Optional LLM to use for title generation. If not provided, - uses self.agent.llm. + llm: Optional LLM to use for title generation. Takes precedence + over the agent's LLM when provided. max_length: Maximum length of the generated title. Returns: @@ -942,16 +946,9 @@ def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str: Raises: ValueError: If no user messages are found in the conversation. """ - # Use provided LLM or fall back to agent's LLM - llm_to_use = llm or self.agent.llm - - # Skip LLM-based title generation for ACP agents with sentinel LLM - # The sentinel model "acp-managed" cannot make LLM calls directly - if llm_to_use.model == "acp-managed": - llm_to_use = None - + effective_llm = llm if llm is not None else self.agent.llm return generate_conversation_title( - events=self._state.events, llm=llm_to_use, max_length=max_length + events=self._state.events, llm=effective_llm, max_length=max_length ) def condense(self) -> None: diff --git a/openhands-sdk/openhands/sdk/conversation/request.py b/openhands-sdk/openhands/sdk/conversation/request.py index 6472899758..f8d7641834 100644 --- a/openhands-sdk/openhands/sdk/conversation/request.py +++ b/openhands-sdk/openhands/sdk/conversation/request.py @@ -146,7 +146,19 @@ class _StartConversationRequestBase(BaseModel): default=True, description=( "If true, automatically generate a title for the conversation from " - "the first user message using the conversation's LLM." + "the first user message. Precedence: title_llm_profile (if set and " + "loads) → agent.llm → message truncation." + ), + ) + title_llm_profile: str | None = Field( + default=None, + description=( + "Optional LLM profile name for title generation. If set, the LLM " + "is loaded from LLMProfileStore (~/.openhands/profiles/) and used " + "for LLM-based title generation. This enables using a fast/cheap " + "model for titles regardless of the agent's main model. If not " + "set (or profile loading fails), title generation falls back to " + "the agent's LLM." ), ) diff --git a/tests/agent_server/test_conversation_service.py b/tests/agent_server/test_conversation_service.py index c66cdf2ea4..9b8d28a032 100644 --- a/tests/agent_server/test_conversation_service.py +++ b/tests/agent_server/test_conversation_service.py @@ -1656,7 +1656,10 @@ class TestAutoTitle: ) def _make_service( - self, title: str | None = None, llm_model: str = "gpt-4o" + self, + title: str | None = None, + title_llm_profile: str | None = None, + llm_model: str = "gpt-4o", ) -> AsyncMock: stored = StoredConversation( id=uuid4(), @@ -1666,6 +1669,7 @@ def _make_service( initial_message=None, metrics=None, title=title, + title_llm_profile=title_llm_profile, ) service = AsyncMock(spec=EventService) service.stored = stored @@ -1684,6 +1688,22 @@ def _user_message_event(self, text: str = "Fix the login bug") -> MessageEvent: llm_message=Message(role="user", content=[TextContent(text=text)]), ) + @staticmethod + async def _drain_title_task( + predicate=lambda: True, max_iterations: int = 50, step: float = 0.02 + ) -> None: + """Yield to the event loop until the background title task completes. + + `AutoTitleSubscriber` schedules generation via `run_in_executor`, so a + single `await asyncio.sleep(0)` is not enough to let the executor + thread finish. Poll with a short sleep until `predicate()` becomes + truthy or the timeout elapses. + """ + for _ in range(max_iterations): + await asyncio.sleep(step) + if predicate(): + return + @pytest.mark.asyncio async def test_autotitle_sets_title_on_first_user_message(self): """Title is generated and saved when the first user message arrives.""" @@ -1764,18 +1784,207 @@ async def test_autotitle_skips_empty_message(self): assert service.stored.title is None + @pytest.mark.asyncio + async def test_autotitle_uses_llm_profile_when_configured(self): + """Profile LLM takes precedence over agent.llm when configured.""" + service = self._make_service(title_llm_profile="cheap-model") + mock_llm = LLM(model="gpt-3.5-turbo", usage_id="title-llm") + + with ( + patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore, + patch( + self._GENERATE_TITLE_PATH, return_value="✨ Profile LLM Title" + ) as mock_generate_title, + ): + mock_store_instance = MockStore.return_value + mock_store_instance.load.return_value = mock_llm + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await self._drain_title_task(lambda: service.stored.title is not None) + + MockStore.assert_called_once_with() + mock_store_instance.load.assert_called_once_with("cheap-model") + # Profile-loaded LLM wins over agent.llm + assert mock_generate_title.called + assert mock_generate_title.call_args.args[1] is mock_llm + + assert service.stored.title == "✨ Profile LLM Title" + service.save_meta.assert_called_once() + + @pytest.mark.asyncio + async def test_autotitle_falls_back_to_agent_llm_when_profile_not_found(self): + """Missing profile → fall back to agent.llm (non-breaking behavior).""" + service = self._make_service(title_llm_profile="nonexistent-profile") + agent_llm = service._conversation.agent.llm + + with ( + patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore, + patch( + self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title" + ) as mock_generate_title, + ): + mock_store_instance = MockStore.return_value + mock_store_instance.load.side_effect = FileNotFoundError( + "Profile 'nonexistent-profile' not found" + ) + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await self._drain_title_task(lambda: service.stored.title is not None) + + # Failed profile load → falls back to agent.llm + assert mock_generate_title.called + assert mock_generate_title.call_args.args[1] is agent_llm + + assert service.stored.title == "✨ Agent LLM Title" + service.save_meta.assert_called_once() + + @pytest.mark.asyncio + async def test_autotitle_no_profile_uses_agent_llm(self): + """No profile configured → use agent.llm (preserves existing behavior).""" + service = self._make_service(title_llm_profile=None) + agent_llm = service._conversation.agent.llm + + with patch( + self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title" + ) as mock_generate_title: + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await self._drain_title_task(lambda: service.stored.title is not None) + + # No profile → agent.llm is used (backwards compatible) + assert mock_generate_title.called + assert mock_generate_title.call_args.args[1] is agent_llm + + assert service.stored.title == "✨ Agent LLM Title" + service.save_meta.assert_called_once() + + @pytest.mark.asyncio + async def test_autotitle_handles_profile_load_value_error(self): + """Profile load ValueError → fall back to agent.llm.""" + service = self._make_service(title_llm_profile="corrupted-profile") + agent_llm = service._conversation.agent.llm + + with ( + patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore, + patch( + self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title" + ) as mock_generate_title, + ): + mock_store_instance = MockStore.return_value + mock_store_instance.load.side_effect = ValueError("Invalid profile format") + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await self._drain_title_task(lambda: service.stored.title is not None) + + assert mock_generate_title.called + assert mock_generate_title.call_args.args[1] is agent_llm + + assert service.stored.title == "✨ Agent LLM Title" + service.save_meta.assert_called_once() + @pytest.mark.asyncio async def test_autotitle_falls_back_for_acp_managed_llm(self): - """ACP-managed agents should skip LLM title generation and fall back.""" + """ACP-managed agents with no title profile → truncation fallback.""" service = self._make_service(llm_model="acp-managed") subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event("Fix the login bug")) - await asyncio.sleep(0) + await self._drain_title_task(lambda: service.stored.title is not None) assert service.stored.title == "Fix the login bug" service.save_meta.assert_called_once() + @pytest.mark.asyncio + async def test_autotitle_integration_routes_through_profile_store(self, tmp_path): + """End-to-end: profile on disk → LLMProfileStore.load → title LLM call. + + Exercises the real wiring from AutoTitleSubscriber through LLMProfileStore + to LLM.completion. Only the network boundary (LLM.completion) is mocked, + so this catches regressions in profile loading, LLM passthrough, and the + agent-server → SDK integration — the unit tests above only exercise + AutoTitleSubscriber in isolation. + """ + from litellm.types.utils import ( + Choices, + Message as LiteLLMMessage, + ModelResponse, + Usage, + ) + + from openhands.sdk.llm import LLMResponse, MetricsSnapshot + from openhands.sdk.llm.llm_profile_store import LLMProfileStore + + # Persist a real LLM profile to disk with a distinctive usage_id so we + # can tell the title LLM apart from the agent's LLM in the assertion. + profile_dir = tmp_path / "profiles" + title_llm_on_disk = LLM( + usage_id="title-llm", + model="claude-haiku-4-5", + api_key=SecretStr("title-key"), + ) + LLMProfileStore(base_dir=profile_dir).save( + "title-fast", title_llm_on_disk, include_secrets=True + ) + + service = self._make_service(title_llm_profile="title-fast") + + calls: list[str] = [] + + def fake_completion(self_llm, _messages, **_kwargs): + calls.append(self_llm.usage_id) + msg = LiteLLMMessage(content="✨ Generated", role="assistant") + choice = Choices(finish_reason="stop", index=0, message=msg) + raw = ModelResponse( + id="resp-1", + choices=[choice], + created=0, + model=self_llm.model, + object="chat.completion", + usage=Usage(prompt_tokens=1, completion_tokens=1, total_tokens=2), + ) + return LLMResponse( + message=Message.from_llm_chat_message(choice["message"]), + metrics=MetricsSnapshot( + model_name=self_llm.model, + accumulated_cost=0.0, + max_budget_per_task=None, + accumulated_token_usage=None, + ), + raw_response=raw, + ) + + # Point LLMProfileStore() (no args) at our tmp dir so the real + # _load_title_llm code path finds our on-disk profile. + with ( + patch( + "openhands.sdk.llm.llm_profile_store._DEFAULT_PROFILE_DIR", profile_dir + ), + patch( + "openhands.sdk.llm.llm.LLM.completion", + autospec=True, + side_effect=fake_completion, + ), + ): + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event("Fix the login bug")) + # Wait for the background executor task to complete. The production + # code uses run_in_executor, so sleep(0) is not enough. + for _ in range(50): + await asyncio.sleep(0.02) + if service.stored.title is not None: + break + + # The profile's LLM (usage_id="title-llm") was called — not agent.llm + # (usage_id="test-llm"). This is the regression-sensitive assertion. + assert calls == ["title-llm"], ( + f"Expected only the title profile LLM to be called, got: {calls}" + ) + assert service.stored.title == "✨ Generated" + service.save_meta.assert_called_once() + class TestACPActivityHeartbeatWiring: """Tests for _setup_acp_activity_heartbeat in EventService.""" diff --git a/tests/sdk/conversation/test_generate_title.py b/tests/sdk/conversation/test_generate_title.py index 8f6628322b..00b65e8a59 100644 --- a/tests/sdk/conversation/test_generate_title.py +++ b/tests/sdk/conversation/test_generate_title.py @@ -67,23 +67,22 @@ def create_mock_llm_response(content: str) -> LLMResponse: @patch("openhands.sdk.llm.llm.LLM.completion") -def test_generate_title_basic(mock_completion): - """Test basic generate_title functionality.""" +def test_generate_title_without_llm_uses_agent_llm(mock_completion): + """Without an explicit LLM, generate_title falls back to the agent's LLM. + + This preserves backwards-compatible behavior for callers that don't + configure a dedicated title LLM. + """ agent = create_test_agent() conv = Conversation(agent=agent, visualizer=None) - # Add a user message to the conversation user_message = create_user_message_event("Help me create a Python script") conv.state.events.append(user_message) - # Mock the LLM response - mock_response = create_mock_llm_response("Create Python Script") - mock_completion.return_value = mock_response + mock_completion.return_value = create_mock_llm_response("Create Python Script") - # Generate title title = conv.generate_title() - # Verify the title was generated assert title == "Create Python Script" mock_completion.assert_called_once() @@ -112,19 +111,42 @@ def test_generate_title_llm_error_fallback(mock_completion): user_message = create_user_message_event("Fix the bug in my application") conv.state.events.append(user_message) + # Create an LLM to pass explicitly + custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="err") + # Mock the LLM to raise an exception mock_completion.side_effect = Exception("LLM error") - # Generate title (should fall back to truncation) - title = conv.generate_title() + # Generate title with explicit LLM (should fall back to truncation on error) + title = conv.generate_title(llm=custom_llm) # Verify fallback title was generated assert title == "Fix the bug in my application" @patch("openhands.sdk.llm.llm.LLM.completion") -def test_generate_title_with_max_length(mock_completion): - """Test generate_title respects max_length parameter.""" +def test_generate_title_truncation_respects_max_length(mock_completion): + """When LLM fails, truncation fallback respects max_length.""" + agent = create_test_agent() + conv = Conversation(agent=agent, visualizer=None) + + # Add a user message that is longer than max_length + long_message = "Create a web application with advanced features and database" + user_message = create_user_message_event(long_message) + conv.state.events.append(user_message) + + # Force LLM failure to exercise the truncation fallback path + mock_completion.side_effect = Exception("LLM error") + + title = conv.generate_title(max_length=20) + + assert len(title) <= 20 + assert title.endswith("...") + + +@patch("openhands.sdk.llm.llm.LLM.completion") +def test_generate_title_with_llm_truncates_long_response(mock_completion): + """Test generate_title truncates long LLM responses to max_length.""" agent = create_test_agent() conv = Conversation(agent=agent, visualizer=None) @@ -132,14 +154,17 @@ def test_generate_title_with_max_length(mock_completion): user_message = create_user_message_event("Create a web application") conv.state.events.append(user_message) + # Create an LLM to pass explicitly + custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="test") + # Mock the LLM response with a long title mock_response = create_mock_llm_response( "Create a Complex Web Application with Database" ) mock_completion.return_value = mock_response - # Generate title with max_length=20 - title = conv.generate_title(max_length=20) + # Generate title with max_length=20 and explicit LLM + title = conv.generate_title(llm=custom_llm, max_length=20) # Verify the title was truncated assert len(title) <= 20 @@ -182,13 +207,16 @@ def test_generate_title_empty_llm_response_fallback(mock_completion): user_message = create_user_message_event("Help with testing") conv.state.events.append(user_message) + # Create an LLM to pass explicitly + custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="empty") + # Mock the LLM response with empty content mock_response = MagicMock() mock_response.choices = [] mock_completion.return_value = mock_response - # Generate title (should fall back to truncation) - title = conv.generate_title() + # Generate title with explicit LLM (falls back to truncation on empty response) + title = conv.generate_title(llm=custom_llm) # Verify fallback title was generated assert title == "Help with testing"