From d542c7758005bd0b6eb6259613d57943d48fd90d Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 19 Mar 2026 17:28:03 +0000 Subject: [PATCH 1/4] feat: add title_llm_profile support for auto-titling Add a new title_llm_profile configuration option that allows specifying a dedicated LLM profile for conversation title generation, decoupling it from the agent's LLM. Changes: - Add title_llm_profile field to _StartConversationRequestBase in models.py - Update AutoTitleSubscriber to load LLM from LLMProfileStore when configured - Falls back gracefully to agent.llm if profile loading fails - Update autotitle field description to reference the new option - Add comprehensive tests for the new functionality This addresses issue #2514 by enabling users to configure a cheap/fast model (e.g., Haiku) for title generation regardless of the agent's main model. Fixes #2514 Co-authored-by: openhands --- .../agent_server/conversation_service.py | 26 +++++- .../openhands/agent_server/models.py | 12 ++- .../agent_server/test_conversation_service.py | 93 ++++++++++++++++++- 3 files changed, 128 insertions(+), 3 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index 37b715191a..cd8c1d43dd 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -788,7 +788,8 @@ async def __call__(self, event: Event) -> None: async def _generate_and_save() -> None: try: - title = await self.service.generate_title() + title_llm = self._load_title_llm() + title = await self.service.generate_title(llm=title_llm) if title and self.service.stored.title is None: self.service.stored.title = title self.service.stored.updated_at = utc_now() @@ -802,6 +803,29 @@ async def _generate_and_save() -> None: asyncio.create_task(_generate_and_save()) + def _load_title_llm(self) -> LLM | None: + """Load the LLM for title generation from profile store. + + Returns: + LLM instance if title_llm_profile is configured, None otherwise. + If profile loading fails, returns None to fall back to agent.llm. + """ + profile_name = self.service.stored.title_llm_profile + if not profile_name: + return None + + try: + from openhands.sdk.llm.llm_profile_store import LLMProfileStore + + profile_store = LLMProfileStore() + return profile_store.load(profile_name) + except (FileNotFoundError, ValueError) as e: + logger.warning( + f"Failed to load title LLM profile '{profile_name}': {e}. " + "Falling back to agent's LLM." + ) + return None + @dataclass class WebhookSubscriber(Subscriber): diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py index 906fc95087..2c95fad211 100644 --- a/openhands-agent-server/openhands/agent_server/models.py +++ b/openhands-agent-server/openhands/agent_server/models.py @@ -146,7 +146,17 @@ class _StartConversationRequestBase(BaseModel): default=True, description=( "If true, automatically generate a title for the conversation from " - "the first user message using the conversation's LLM." + "the first user message. Uses the LLM configured via title_llm_profile " + "if set, otherwise falls back to the agent's LLM." + ), + ) + title_llm_profile: str | None = Field( + default=None, + description=( + "Optional LLM profile name for title generation. If set, the LLM " + "is loaded from LLMProfileStore (~/.openhands/profiles/) instead of " + "using the agent's LLM. This enables using a fast/cheap model for " + "title generation regardless of the agent's main model." ), ) diff --git a/tests/agent_server/test_conversation_service.py b/tests/agent_server/test_conversation_service.py index 92ae9e7a32..1171571323 100644 --- a/tests/agent_server/test_conversation_service.py +++ b/tests/agent_server/test_conversation_service.py @@ -1549,7 +1549,9 @@ def test_safe_rmtree_readonly_file_handling(self): class TestAutoTitle: """Tests for AutoTitleSubscriber.""" - def _make_service(self, title: str | None = None) -> AsyncMock: + def _make_service( + self, title: str | None = None, title_llm_profile: str | None = None + ) -> AsyncMock: stored = StoredConversation( id=uuid4(), agent=Agent(llm=LLM(model="gpt-4o", usage_id="test-llm"), tools=[]), @@ -1558,6 +1560,7 @@ def _make_service(self, title: str | None = None) -> AsyncMock: initial_message=None, metrics=None, title=title, + title_llm_profile=title_llm_profile, ) service = AsyncMock(spec=EventService) service.stored = stored @@ -1632,3 +1635,91 @@ async def test_autotitle_handles_generate_title_failure(self): # Title remains unset; save_meta was never called assert service.stored.title is None service.save_meta.assert_not_called() + + @pytest.mark.asyncio + async def test_autotitle_uses_llm_profile_when_configured(self): + """Title is generated using the LLM from the configured profile.""" + service = self._make_service(title_llm_profile="cheap-model") + service.generate_title.return_value = "✨ Profile LLM Title" + + # Create a mock LLM that will be returned by the profile store + mock_llm = LLM(model="gpt-3.5-turbo", usage_id="title-llm") + + with patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore: + mock_store_instance = MockStore.return_value + mock_store_instance.load.return_value = mock_llm + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await asyncio.sleep(0) + + # Verify the profile store was used to load the LLM + MockStore.assert_called_once_with() + mock_store_instance.load.assert_called_once_with("cheap-model") + + # Verify generate_title was called with the loaded LLM + service.generate_title.assert_called_once_with(llm=mock_llm) + + # Verify the title was saved + assert service.stored.title == "✨ Profile LLM Title" + service.save_meta.assert_called_once() + + @pytest.mark.asyncio + async def test_autotitle_falls_back_when_profile_not_found(self): + """Falls back to agent LLM when the configured profile is not found.""" + service = self._make_service(title_llm_profile="nonexistent-profile") + service.generate_title.return_value = "✨ Fallback Title" + + with patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore: + mock_store_instance = MockStore.return_value + mock_store_instance.load.side_effect = FileNotFoundError( + "Profile 'nonexistent-profile' not found" + ) + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await asyncio.sleep(0) + + # Verify generate_title was called with llm=None (to fall back to agent.llm) + service.generate_title.assert_called_once_with(llm=None) + + # Title should still be generated using the fallback + assert service.stored.title == "✨ Fallback Title" + service.save_meta.assert_called_once() + + @pytest.mark.asyncio + async def test_autotitle_no_profile_calls_without_llm(self): + """When no title_llm_profile, generate_title called without llm arg.""" + service = self._make_service(title_llm_profile=None) + service.generate_title.return_value = "✨ Agent LLM Title" + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await asyncio.sleep(0) + + # Verify generate_title was called with llm=None (to use agent.llm) + service.generate_title.assert_called_once_with(llm=None) + + # Title should be generated + assert service.stored.title == "✨ Agent LLM Title" + service.save_meta.assert_called_once() + + @pytest.mark.asyncio + async def test_autotitle_handles_profile_load_value_error(self): + """Falls back gracefully when profile loading fails with ValueError.""" + service = self._make_service(title_llm_profile="corrupted-profile") + service.generate_title.return_value = "✨ Fallback Title" + + with patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore: + mock_store_instance = MockStore.return_value + mock_store_instance.load.side_effect = ValueError("Invalid profile format") + + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event()) + await asyncio.sleep(0) + + # Verify generate_title was called with llm=None (to fall back to agent.llm) + service.generate_title.assert_called_once_with(llm=None) + + # Title should still be generated using the fallback + assert service.stored.title == "✨ Fallback Title" From a3ca14e1db733f652588c5818ede1c74f3b6f3ef Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 19 Mar 2026 17:32:15 +0000 Subject: [PATCH 2/4] refactor: fully decouple title generation from agent.llm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the fallback to agent.llm in LocalConversation.generate_title(). Title generation now requires an explicit LLM parameter - if not provided, it falls back directly to simple message truncation. This makes title generation completely independent of the agent's LLM, which addresses the case where agent.llm may not be callable (e.g., sentinel LLM in certain configurations). Fallback chain is now: 1. Configured profile LLM → LLM-based title generation 2. No profile/LLM provided → truncation fallback directly Updated tests to reflect the new behavior: - test_generate_title_without_llm_uses_truncation: Tests truncation fallback - Updated tests to pass explicit LLM when testing LLM-based title generation Co-authored-by: openhands --- .../conversation/impl/local_conversation.py | 16 ++--- tests/sdk/conversation/test_generate_title.py | 63 +++++++++++++------ 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 68b0e4997a..0922d0b5ed 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -904,9 +904,13 @@ def ask_agent(self, question: str) -> str: def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str: """Generate a title for the conversation based on the first user message. + Title generation is decoupled from the agent's LLM. If an LLM is provided, + it will be used to generate a descriptive title. If no LLM is provided, + the title is generated by truncating the first user message. + Args: llm: Optional LLM to use for title generation. If not provided, - uses self.agent.llm. + falls back to simple message truncation. max_length: Maximum length of the generated title. Returns: @@ -915,16 +919,8 @@ def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str: Raises: ValueError: If no user messages are found in the conversation. """ - # Use provided LLM or fall back to agent's LLM - llm_to_use = llm or self.agent.llm - - # Skip LLM-based title generation for ACP agents with sentinel LLM - # The sentinel model "acp-managed" cannot make LLM calls directly - if llm_to_use.model == "acp-managed": - llm_to_use = None - return generate_conversation_title( - events=self._state.events, llm=llm_to_use, max_length=max_length + events=self._state.events, llm=llm, max_length=max_length ) def condense(self) -> None: diff --git a/tests/sdk/conversation/test_generate_title.py b/tests/sdk/conversation/test_generate_title.py index 8f6628322b..a9f555c01a 100644 --- a/tests/sdk/conversation/test_generate_title.py +++ b/tests/sdk/conversation/test_generate_title.py @@ -66,9 +66,12 @@ def create_mock_llm_response(content: str) -> LLMResponse: ) -@patch("openhands.sdk.llm.llm.LLM.completion") -def test_generate_title_basic(mock_completion): - """Test basic generate_title functionality.""" +def test_generate_title_without_llm_uses_truncation(): + """Test generate_title falls back to truncation when no LLM is provided. + + Title generation is decoupled from agent.llm - calling generate_title() + without an explicit LLM parameter falls back to simple message truncation. + """ agent = create_test_agent() conv = Conversation(agent=agent, visualizer=None) @@ -76,16 +79,11 @@ def test_generate_title_basic(mock_completion): user_message = create_user_message_event("Help me create a Python script") conv.state.events.append(user_message) - # Mock the LLM response - mock_response = create_mock_llm_response("Create Python Script") - mock_completion.return_value = mock_response - - # Generate title + # Generate title without providing an LLM - falls back to truncation title = conv.generate_title() - # Verify the title was generated - assert title == "Create Python Script" - mock_completion.assert_called_once() + # Verify the title is the truncated message (no LLM call made) + assert title == "Help me create a Python script" def test_generate_title_no_user_messages(): @@ -112,19 +110,40 @@ def test_generate_title_llm_error_fallback(mock_completion): user_message = create_user_message_event("Fix the bug in my application") conv.state.events.append(user_message) + # Create an LLM to pass explicitly + custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="err") + # Mock the LLM to raise an exception mock_completion.side_effect = Exception("LLM error") - # Generate title (should fall back to truncation) - title = conv.generate_title() + # Generate title with explicit LLM (should fall back to truncation on error) + title = conv.generate_title(llm=custom_llm) # Verify fallback title was generated assert title == "Fix the bug in my application" +def test_generate_title_truncation_respects_max_length(): + """Test generate_title truncation respects max_length parameter.""" + agent = create_test_agent() + conv = Conversation(agent=agent, visualizer=None) + + # Add a user message that is longer than max_length + long_message = "Create a web application with advanced features and database" + user_message = create_user_message_event(long_message) + conv.state.events.append(user_message) + + # Generate title with max_length=20 (no LLM, so falls back to truncation) + title = conv.generate_title(max_length=20) + + # Verify the title was truncated + assert len(title) <= 20 + assert title.endswith("...") + + @patch("openhands.sdk.llm.llm.LLM.completion") -def test_generate_title_with_max_length(mock_completion): - """Test generate_title respects max_length parameter.""" +def test_generate_title_with_llm_truncates_long_response(mock_completion): + """Test generate_title truncates long LLM responses to max_length.""" agent = create_test_agent() conv = Conversation(agent=agent, visualizer=None) @@ -132,14 +151,17 @@ def test_generate_title_with_max_length(mock_completion): user_message = create_user_message_event("Create a web application") conv.state.events.append(user_message) + # Create an LLM to pass explicitly + custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="test") + # Mock the LLM response with a long title mock_response = create_mock_llm_response( "Create a Complex Web Application with Database" ) mock_completion.return_value = mock_response - # Generate title with max_length=20 - title = conv.generate_title(max_length=20) + # Generate title with max_length=20 and explicit LLM + title = conv.generate_title(llm=custom_llm, max_length=20) # Verify the title was truncated assert len(title) <= 20 @@ -182,13 +204,16 @@ def test_generate_title_empty_llm_response_fallback(mock_completion): user_message = create_user_message_event("Help with testing") conv.state.events.append(user_message) + # Create an LLM to pass explicitly + custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="empty") + # Mock the LLM response with empty content mock_response = MagicMock() mock_response.choices = [] mock_completion.return_value = mock_response - # Generate title (should fall back to truncation) - title = conv.generate_title() + # Generate title with explicit LLM (falls back to truncation on empty response) + title = conv.generate_title(llm=custom_llm) # Verify fallback title was generated assert title == "Help with testing" From 3de41d132060181147618c6dfe3df20ead4f8080 Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Sat, 18 Apr 2026 15:50:29 -0300 Subject: [PATCH 3/4] fix: make title_llm_profile non-breaking for existing consumers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore agent.llm fallback so title generation precedence is now: title_llm_profile (if set and loads) → agent.llm → message truncation. Previously this PR fully decoupled title generation from agent.llm, which silently changed existing consumers' auto-titles from "LLM-generated" to "truncated first message." title_llm_profile is now a pure opt-in enhancement — consumers that don't configure it keep getting LLM-generated titles from the agent's LLM. - LocalConversation.generate_title(): fall back to self.agent.llm when no explicit LLM is passed - AutoTitleSubscriber: if title_llm_profile is set and loads, use it; otherwise fall back to conversation.agent.llm; otherwise truncation - Update tests to match the non-breaking precedence --- .../agent_server/conversation_service.py | 13 +++++-- .../conversation/impl/local_conversation.py | 13 ++++--- .../openhands/sdk/conversation/request.py | 8 ++-- .../agent_server/test_conversation_service.py | 39 ++++++++++--------- tests/sdk/conversation/test_generate_title.py | 27 +++++++------ 5 files changed, 56 insertions(+), 44 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index 85ea2b5d83..067e98502d 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -827,7 +827,13 @@ async def __call__(self, event: Event) -> None: if not message_text: return + # Precedence: title_llm_profile (if configured and loads) → agent.llm → + # truncation. This keeps auto-titling non-breaking for consumers who + # don't configure title_llm_profile. title_llm = self._load_title_llm() + if title_llm is None: + conversation = self.service._conversation + title_llm = conversation.agent.llm if conversation else None async def _generate_and_save() -> None: try: @@ -857,9 +863,8 @@ def _load_title_llm(self) -> LLM | None: Returns: LLM instance if title_llm_profile is configured and loads - successfully, None otherwise. When None is returned, title - generation falls back to simple message truncation (title - generation is decoupled from the agent's LLM). + successfully, None otherwise. When None is returned, the caller + falls back to the agent's LLM (and then to message truncation). """ profile_name = self.service.stored.title_llm_profile if not profile_name: @@ -873,7 +878,7 @@ def _load_title_llm(self) -> LLM | None: except (FileNotFoundError, ValueError) as e: logger.warning( f"Failed to load title LLM profile '{profile_name}': {e}. " - "Falling back to message truncation." + "Falling back to the agent's LLM." ) return None diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index b82f2ed3a4..a2b6b3438e 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -931,13 +931,13 @@ def ask_agent(self, question: str) -> str: def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str: """Generate a title for the conversation based on the first user message. - Title generation is decoupled from the agent's LLM. If an LLM is provided, - it will be used to generate a descriptive title. If no LLM is provided, - the title is generated by truncating the first user message. + If an explicit LLM is provided, it takes precedence. Otherwise the + agent's LLM is used. If neither is available, the title falls back to + simple message truncation. Args: - llm: Optional LLM to use for title generation. If not provided, - falls back to simple message truncation. + llm: Optional LLM to use for title generation. Takes precedence + over the agent's LLM when provided. max_length: Maximum length of the generated title. Returns: @@ -946,8 +946,9 @@ def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str: Raises: ValueError: If no user messages are found in the conversation. """ + effective_llm = llm if llm is not None else self.agent.llm return generate_conversation_title( - events=self._state.events, llm=llm, max_length=max_length + events=self._state.events, llm=effective_llm, max_length=max_length ) def condense(self) -> None: diff --git a/openhands-sdk/openhands/sdk/conversation/request.py b/openhands-sdk/openhands/sdk/conversation/request.py index b84345cf97..f8d7641834 100644 --- a/openhands-sdk/openhands/sdk/conversation/request.py +++ b/openhands-sdk/openhands/sdk/conversation/request.py @@ -146,9 +146,8 @@ class _StartConversationRequestBase(BaseModel): default=True, description=( "If true, automatically generate a title for the conversation from " - "the first user message. Uses the LLM configured via " - "title_llm_profile if set; otherwise falls back to simple message " - "truncation." + "the first user message. Precedence: title_llm_profile (if set and " + "loads) → agent.llm → message truncation." ), ) title_llm_profile: str | None = Field( @@ -158,7 +157,8 @@ class _StartConversationRequestBase(BaseModel): "is loaded from LLMProfileStore (~/.openhands/profiles/) and used " "for LLM-based title generation. This enables using a fast/cheap " "model for titles regardless of the agent's main model. If not " - "set, title generation falls back to simple message truncation." + "set (or profile loading fails), title generation falls back to " + "the agent's LLM." ), ) diff --git a/tests/agent_server/test_conversation_service.py b/tests/agent_server/test_conversation_service.py index 00ac701868..a6caabb851 100644 --- a/tests/agent_server/test_conversation_service.py +++ b/tests/agent_server/test_conversation_service.py @@ -1770,7 +1770,7 @@ async def test_autotitle_skips_empty_message(self): @pytest.mark.asyncio async def test_autotitle_uses_llm_profile_when_configured(self): - """Title generation uses the LLM loaded from the configured profile.""" + """Profile LLM takes precedence over agent.llm when configured.""" service = self._make_service(title_llm_profile="cheap-model") mock_llm = LLM(model="gpt-3.5-turbo", usage_id="title-llm") @@ -1789,7 +1789,7 @@ async def test_autotitle_uses_llm_profile_when_configured(self): MockStore.assert_called_once_with() mock_store_instance.load.assert_called_once_with("cheap-model") - # generate_title_from_message is invoked with the profile-loaded LLM + # Profile-loaded LLM wins over agent.llm assert mock_generate_title.called assert mock_generate_title.call_args.args[1] is mock_llm @@ -1797,14 +1797,15 @@ async def test_autotitle_uses_llm_profile_when_configured(self): service.save_meta.assert_called_once() @pytest.mark.asyncio - async def test_autotitle_falls_back_to_truncation_when_profile_not_found(self): - """Missing profile → title_llm is None → truncation fallback (no agent.llm).""" + async def test_autotitle_falls_back_to_agent_llm_when_profile_not_found(self): + """Missing profile → fall back to agent.llm (non-breaking behavior).""" service = self._make_service(title_llm_profile="nonexistent-profile") + agent_llm = service._conversation.agent.llm with ( patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore, patch( - self._GENERATE_TITLE_PATH, return_value="Fix the login bug" + self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title" ) as mock_generate_title, ): mock_store_instance = MockStore.return_value @@ -1816,41 +1817,43 @@ async def test_autotitle_falls_back_to_truncation_when_profile_not_found(self): await subscriber(self._user_message_event()) await asyncio.sleep(0) - # Failed profile load → title_llm is None (no fallback to agent.llm) + # Failed profile load → falls back to agent.llm assert mock_generate_title.called - assert mock_generate_title.call_args.args[1] is None + assert mock_generate_title.call_args.args[1] is agent_llm - assert service.stored.title == "Fix the login bug" + assert service.stored.title == "✨ Agent LLM Title" service.save_meta.assert_called_once() @pytest.mark.asyncio - async def test_autotitle_no_profile_calls_without_llm(self): - """No profile → generate_title_from_message is called with llm=None.""" + async def test_autotitle_no_profile_uses_agent_llm(self): + """No profile configured → use agent.llm (preserves existing behavior).""" service = self._make_service(title_llm_profile=None) + agent_llm = service._conversation.agent.llm with patch( - self._GENERATE_TITLE_PATH, return_value="Fix the login bug" + self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title" ) as mock_generate_title: subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event()) await asyncio.sleep(0) - # Decoupled from agent.llm — no profile means llm=None + # No profile → agent.llm is used (backwards compatible) assert mock_generate_title.called - assert mock_generate_title.call_args.args[1] is None + assert mock_generate_title.call_args.args[1] is agent_llm - assert service.stored.title == "Fix the login bug" + assert service.stored.title == "✨ Agent LLM Title" service.save_meta.assert_called_once() @pytest.mark.asyncio async def test_autotitle_handles_profile_load_value_error(self): - """Profile load ValueError → title_llm is None, fallback to truncation.""" + """Profile load ValueError → fall back to agent.llm.""" service = self._make_service(title_llm_profile="corrupted-profile") + agent_llm = service._conversation.agent.llm with ( patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore, patch( - self._GENERATE_TITLE_PATH, return_value="Fix the login bug" + self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title" ) as mock_generate_title, ): mock_store_instance = MockStore.return_value @@ -1861,9 +1864,9 @@ async def test_autotitle_handles_profile_load_value_error(self): await asyncio.sleep(0) assert mock_generate_title.called - assert mock_generate_title.call_args.args[1] is None + assert mock_generate_title.call_args.args[1] is agent_llm - assert service.stored.title == "Fix the login bug" + assert service.stored.title == "✨ Agent LLM Title" service.save_meta.assert_called_once() @pytest.mark.asyncio diff --git a/tests/sdk/conversation/test_generate_title.py b/tests/sdk/conversation/test_generate_title.py index a9f555c01a..00b65e8a59 100644 --- a/tests/sdk/conversation/test_generate_title.py +++ b/tests/sdk/conversation/test_generate_title.py @@ -66,24 +66,25 @@ def create_mock_llm_response(content: str) -> LLMResponse: ) -def test_generate_title_without_llm_uses_truncation(): - """Test generate_title falls back to truncation when no LLM is provided. +@patch("openhands.sdk.llm.llm.LLM.completion") +def test_generate_title_without_llm_uses_agent_llm(mock_completion): + """Without an explicit LLM, generate_title falls back to the agent's LLM. - Title generation is decoupled from agent.llm - calling generate_title() - without an explicit LLM parameter falls back to simple message truncation. + This preserves backwards-compatible behavior for callers that don't + configure a dedicated title LLM. """ agent = create_test_agent() conv = Conversation(agent=agent, visualizer=None) - # Add a user message to the conversation user_message = create_user_message_event("Help me create a Python script") conv.state.events.append(user_message) - # Generate title without providing an LLM - falls back to truncation + mock_completion.return_value = create_mock_llm_response("Create Python Script") + title = conv.generate_title() - # Verify the title is the truncated message (no LLM call made) - assert title == "Help me create a Python script" + assert title == "Create Python Script" + mock_completion.assert_called_once() def test_generate_title_no_user_messages(): @@ -123,8 +124,9 @@ def test_generate_title_llm_error_fallback(mock_completion): assert title == "Fix the bug in my application" -def test_generate_title_truncation_respects_max_length(): - """Test generate_title truncation respects max_length parameter.""" +@patch("openhands.sdk.llm.llm.LLM.completion") +def test_generate_title_truncation_respects_max_length(mock_completion): + """When LLM fails, truncation fallback respects max_length.""" agent = create_test_agent() conv = Conversation(agent=agent, visualizer=None) @@ -133,10 +135,11 @@ def test_generate_title_truncation_respects_max_length(): user_message = create_user_message_event(long_message) conv.state.events.append(user_message) - # Generate title with max_length=20 (no LLM, so falls back to truncation) + # Force LLM failure to exercise the truncation fallback path + mock_completion.side_effect = Exception("LLM error") + title = conv.generate_title(max_length=20) - # Verify the title was truncated assert len(title) <= 20 assert title.endswith("...") From 15a0a5c83dc0b520cc705359b474aa9a5453b3db Mon Sep 17 00:00:00 2001 From: Debug Agent Date: Sat, 18 Apr 2026 16:09:14 -0300 Subject: [PATCH 4/4] test: add title_llm_profile integration test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an end-to-end test that exercises the real wiring from AutoTitleSubscriber through LLMProfileStore to LLM.completion: - Persists a real LLM profile to disk via LLMProfileStore - Points the default profile dir at a tmp path so the subscriber's LLMProfileStore() (no args) picks up the on-disk profile - Patches only LLM.completion (the network boundary) and captures the caller's usage_id to prove the profile LLM — not agent.llm — was used The existing unit tests mock generate_title_from_message directly, so a regression in profile loading or subscriber wiring would slip through. This integration test closes that gap. Also adds _drain_title_task helper that polls for the background executor task to complete, since AutoTitleSubscriber uses run_in_executor and a single await asyncio.sleep(0) is not enough. Updates the 4 title_llm_profile tests + the acp-managed test to use it, making them deterministic. --- .../agent_server/test_conversation_service.py | 114 +++++++++++++++++- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/tests/agent_server/test_conversation_service.py b/tests/agent_server/test_conversation_service.py index a6caabb851..9b8d28a032 100644 --- a/tests/agent_server/test_conversation_service.py +++ b/tests/agent_server/test_conversation_service.py @@ -1688,6 +1688,22 @@ def _user_message_event(self, text: str = "Fix the login bug") -> MessageEvent: llm_message=Message(role="user", content=[TextContent(text=text)]), ) + @staticmethod + async def _drain_title_task( + predicate=lambda: True, max_iterations: int = 50, step: float = 0.02 + ) -> None: + """Yield to the event loop until the background title task completes. + + `AutoTitleSubscriber` schedules generation via `run_in_executor`, so a + single `await asyncio.sleep(0)` is not enough to let the executor + thread finish. Poll with a short sleep until `predicate()` becomes + truthy or the timeout elapses. + """ + for _ in range(max_iterations): + await asyncio.sleep(step) + if predicate(): + return + @pytest.mark.asyncio async def test_autotitle_sets_title_on_first_user_message(self): """Title is generated and saved when the first user message arrives.""" @@ -1785,7 +1801,7 @@ async def test_autotitle_uses_llm_profile_when_configured(self): subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event()) - await asyncio.sleep(0) + await self._drain_title_task(lambda: service.stored.title is not None) MockStore.assert_called_once_with() mock_store_instance.load.assert_called_once_with("cheap-model") @@ -1815,7 +1831,7 @@ async def test_autotitle_falls_back_to_agent_llm_when_profile_not_found(self): subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event()) - await asyncio.sleep(0) + await self._drain_title_task(lambda: service.stored.title is not None) # Failed profile load → falls back to agent.llm assert mock_generate_title.called @@ -1835,7 +1851,7 @@ async def test_autotitle_no_profile_uses_agent_llm(self): ) as mock_generate_title: subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event()) - await asyncio.sleep(0) + await self._drain_title_task(lambda: service.stored.title is not None) # No profile → agent.llm is used (backwards compatible) assert mock_generate_title.called @@ -1861,7 +1877,7 @@ async def test_autotitle_handles_profile_load_value_error(self): subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event()) - await asyncio.sleep(0) + await self._drain_title_task(lambda: service.stored.title is not None) assert mock_generate_title.called assert mock_generate_title.call_args.args[1] is agent_llm @@ -1876,11 +1892,99 @@ async def test_autotitle_falls_back_for_acp_managed_llm(self): subscriber = AutoTitleSubscriber(service=service) await subscriber(self._user_message_event("Fix the login bug")) - await asyncio.sleep(0) + await self._drain_title_task(lambda: service.stored.title is not None) assert service.stored.title == "Fix the login bug" service.save_meta.assert_called_once() + @pytest.mark.asyncio + async def test_autotitle_integration_routes_through_profile_store(self, tmp_path): + """End-to-end: profile on disk → LLMProfileStore.load → title LLM call. + + Exercises the real wiring from AutoTitleSubscriber through LLMProfileStore + to LLM.completion. Only the network boundary (LLM.completion) is mocked, + so this catches regressions in profile loading, LLM passthrough, and the + agent-server → SDK integration — the unit tests above only exercise + AutoTitleSubscriber in isolation. + """ + from litellm.types.utils import ( + Choices, + Message as LiteLLMMessage, + ModelResponse, + Usage, + ) + + from openhands.sdk.llm import LLMResponse, MetricsSnapshot + from openhands.sdk.llm.llm_profile_store import LLMProfileStore + + # Persist a real LLM profile to disk with a distinctive usage_id so we + # can tell the title LLM apart from the agent's LLM in the assertion. + profile_dir = tmp_path / "profiles" + title_llm_on_disk = LLM( + usage_id="title-llm", + model="claude-haiku-4-5", + api_key=SecretStr("title-key"), + ) + LLMProfileStore(base_dir=profile_dir).save( + "title-fast", title_llm_on_disk, include_secrets=True + ) + + service = self._make_service(title_llm_profile="title-fast") + + calls: list[str] = [] + + def fake_completion(self_llm, _messages, **_kwargs): + calls.append(self_llm.usage_id) + msg = LiteLLMMessage(content="✨ Generated", role="assistant") + choice = Choices(finish_reason="stop", index=0, message=msg) + raw = ModelResponse( + id="resp-1", + choices=[choice], + created=0, + model=self_llm.model, + object="chat.completion", + usage=Usage(prompt_tokens=1, completion_tokens=1, total_tokens=2), + ) + return LLMResponse( + message=Message.from_llm_chat_message(choice["message"]), + metrics=MetricsSnapshot( + model_name=self_llm.model, + accumulated_cost=0.0, + max_budget_per_task=None, + accumulated_token_usage=None, + ), + raw_response=raw, + ) + + # Point LLMProfileStore() (no args) at our tmp dir so the real + # _load_title_llm code path finds our on-disk profile. + with ( + patch( + "openhands.sdk.llm.llm_profile_store._DEFAULT_PROFILE_DIR", profile_dir + ), + patch( + "openhands.sdk.llm.llm.LLM.completion", + autospec=True, + side_effect=fake_completion, + ), + ): + subscriber = AutoTitleSubscriber(service=service) + await subscriber(self._user_message_event("Fix the login bug")) + # Wait for the background executor task to complete. The production + # code uses run_in_executor, so sleep(0) is not enough. + for _ in range(50): + await asyncio.sleep(0.02) + if service.stored.title is not None: + break + + # The profile's LLM (usage_id="title-llm") was called — not agent.llm + # (usage_id="test-llm"). This is the regression-sensitive assertion. + assert calls == ["title-llm"], ( + f"Expected only the title profile LLM to be called, got: {calls}" + ) + assert service.stored.title == "✨ Generated" + service.save_meta.assert_called_once() + class TestACPActivityHeartbeatWiring: """Tests for _setup_acp_activity_heartbeat in EventService."""