Skip to content

Commit b7fb932

Browse files
simonrosenbergopenhands-agentDebug Agent
authored
feat: add title_llm_profile support for auto-titling (#2515)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Debug Agent <debug@example.com>
1 parent 5273c5d commit b7fb932

File tree

5 files changed

+309
-34
lines changed

5 files changed

+309
-34
lines changed

openhands-agent-server/openhands/agent_server/conversation_service.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -827,8 +827,13 @@ async def __call__(self, event: Event) -> None:
827827
if not message_text:
828828
return
829829

830-
conversation = self.service._conversation
831-
llm = conversation.agent.llm if conversation else None
830+
# Precedence: title_llm_profile (if configured and loads) → agent.llm →
831+
# truncation. This keeps auto-titling non-breaking for consumers who
832+
# don't configure title_llm_profile.
833+
title_llm = self._load_title_llm()
834+
if title_llm is None:
835+
conversation = self.service._conversation
836+
title_llm = conversation.agent.llm if conversation else None
832837

833838
async def _generate_and_save() -> None:
834839
try:
@@ -837,7 +842,7 @@ async def _generate_and_save() -> None:
837842
None,
838843
generate_title_from_message,
839844
message_text,
840-
llm,
845+
title_llm,
841846
50,
842847
)
843848
if title and self.service.stored.title is None:
@@ -853,6 +858,30 @@ async def _generate_and_save() -> None:
853858

854859
asyncio.create_task(_generate_and_save())
855860

861+
def _load_title_llm(self) -> LLM | None:
862+
"""Load the LLM for title generation from profile store.
863+
864+
Returns:
865+
LLM instance if title_llm_profile is configured and loads
866+
successfully, None otherwise. When None is returned, the caller
867+
falls back to the agent's LLM (and then to message truncation).
868+
"""
869+
profile_name = self.service.stored.title_llm_profile
870+
if not profile_name:
871+
return None
872+
873+
try:
874+
from openhands.sdk.llm.llm_profile_store import LLMProfileStore
875+
876+
profile_store = LLMProfileStore()
877+
return profile_store.load(profile_name)
878+
except (FileNotFoundError, ValueError) as e:
879+
logger.warning(
880+
f"Failed to load title LLM profile '{profile_name}': {e}. "
881+
"Falling back to the agent's LLM."
882+
)
883+
return None
884+
856885

857886
@dataclass
858887
class WebhookSubscriber(Subscriber):

openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -931,9 +931,13 @@ def ask_agent(self, question: str) -> str:
931931
def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str:
932932
"""Generate a title for the conversation based on the first user message.
933933
934+
If an explicit LLM is provided, it takes precedence. Otherwise the
935+
agent's LLM is used. If neither is available, the title falls back to
936+
simple message truncation.
937+
934938
Args:
935-
llm: Optional LLM to use for title generation. If not provided,
936-
uses self.agent.llm.
939+
llm: Optional LLM to use for title generation. Takes precedence
940+
over the agent's LLM when provided.
937941
max_length: Maximum length of the generated title.
938942
939943
Returns:
@@ -942,16 +946,9 @@ def generate_title(self, llm: LLM | None = None, max_length: int = 50) -> str:
942946
Raises:
943947
ValueError: If no user messages are found in the conversation.
944948
"""
945-
# Use provided LLM or fall back to agent's LLM
946-
llm_to_use = llm or self.agent.llm
947-
948-
# Skip LLM-based title generation for ACP agents with sentinel LLM
949-
# The sentinel model "acp-managed" cannot make LLM calls directly
950-
if llm_to_use.model == "acp-managed":
951-
llm_to_use = None
952-
949+
effective_llm = llm if llm is not None else self.agent.llm
953950
return generate_conversation_title(
954-
events=self._state.events, llm=llm_to_use, max_length=max_length
951+
events=self._state.events, llm=effective_llm, max_length=max_length
955952
)
956953

957954
def condense(self) -> None:

openhands-sdk/openhands/sdk/conversation/request.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,19 @@ class _StartConversationRequestBase(BaseModel):
146146
default=True,
147147
description=(
148148
"If true, automatically generate a title for the conversation from "
149-
"the first user message using the conversation's LLM."
149+
"the first user message. Precedence: title_llm_profile (if set and "
150+
"loads) → agent.llm → message truncation."
151+
),
152+
)
153+
title_llm_profile: str | None = Field(
154+
default=None,
155+
description=(
156+
"Optional LLM profile name for title generation. If set, the LLM "
157+
"is loaded from LLMProfileStore (~/.openhands/profiles/) and used "
158+
"for LLM-based title generation. This enables using a fast/cheap "
159+
"model for titles regardless of the agent's main model. If not "
160+
"set (or profile loading fails), title generation falls back to "
161+
"the agent's LLM."
150162
),
151163
)
152164

tests/agent_server/test_conversation_service.py

Lines changed: 212 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,7 +1656,10 @@ class TestAutoTitle:
16561656
)
16571657

16581658
def _make_service(
1659-
self, title: str | None = None, llm_model: str = "gpt-4o"
1659+
self,
1660+
title: str | None = None,
1661+
title_llm_profile: str | None = None,
1662+
llm_model: str = "gpt-4o",
16601663
) -> AsyncMock:
16611664
stored = StoredConversation(
16621665
id=uuid4(),
@@ -1666,6 +1669,7 @@ def _make_service(
16661669
initial_message=None,
16671670
metrics=None,
16681671
title=title,
1672+
title_llm_profile=title_llm_profile,
16691673
)
16701674
service = AsyncMock(spec=EventService)
16711675
service.stored = stored
@@ -1684,6 +1688,22 @@ def _user_message_event(self, text: str = "Fix the login bug") -> MessageEvent:
16841688
llm_message=Message(role="user", content=[TextContent(text=text)]),
16851689
)
16861690

1691+
@staticmethod
1692+
async def _drain_title_task(
1693+
predicate=lambda: True, max_iterations: int = 50, step: float = 0.02
1694+
) -> None:
1695+
"""Yield to the event loop until the background title task completes.
1696+
1697+
`AutoTitleSubscriber` schedules generation via `run_in_executor`, so a
1698+
single `await asyncio.sleep(0)` is not enough to let the executor
1699+
thread finish. Poll with a short sleep until `predicate()` becomes
1700+
truthy or the timeout elapses.
1701+
"""
1702+
for _ in range(max_iterations):
1703+
await asyncio.sleep(step)
1704+
if predicate():
1705+
return
1706+
16871707
@pytest.mark.asyncio
16881708
async def test_autotitle_sets_title_on_first_user_message(self):
16891709
"""Title is generated and saved when the first user message arrives."""
@@ -1764,18 +1784,207 @@ async def test_autotitle_skips_empty_message(self):
17641784

17651785
assert service.stored.title is None
17661786

1787+
@pytest.mark.asyncio
1788+
async def test_autotitle_uses_llm_profile_when_configured(self):
1789+
"""Profile LLM takes precedence over agent.llm when configured."""
1790+
service = self._make_service(title_llm_profile="cheap-model")
1791+
mock_llm = LLM(model="gpt-3.5-turbo", usage_id="title-llm")
1792+
1793+
with (
1794+
patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore,
1795+
patch(
1796+
self._GENERATE_TITLE_PATH, return_value="✨ Profile LLM Title"
1797+
) as mock_generate_title,
1798+
):
1799+
mock_store_instance = MockStore.return_value
1800+
mock_store_instance.load.return_value = mock_llm
1801+
1802+
subscriber = AutoTitleSubscriber(service=service)
1803+
await subscriber(self._user_message_event())
1804+
await self._drain_title_task(lambda: service.stored.title is not None)
1805+
1806+
MockStore.assert_called_once_with()
1807+
mock_store_instance.load.assert_called_once_with("cheap-model")
1808+
# Profile-loaded LLM wins over agent.llm
1809+
assert mock_generate_title.called
1810+
assert mock_generate_title.call_args.args[1] is mock_llm
1811+
1812+
assert service.stored.title == "✨ Profile LLM Title"
1813+
service.save_meta.assert_called_once()
1814+
1815+
@pytest.mark.asyncio
1816+
async def test_autotitle_falls_back_to_agent_llm_when_profile_not_found(self):
1817+
"""Missing profile → fall back to agent.llm (non-breaking behavior)."""
1818+
service = self._make_service(title_llm_profile="nonexistent-profile")
1819+
agent_llm = service._conversation.agent.llm
1820+
1821+
with (
1822+
patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore,
1823+
patch(
1824+
self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title"
1825+
) as mock_generate_title,
1826+
):
1827+
mock_store_instance = MockStore.return_value
1828+
mock_store_instance.load.side_effect = FileNotFoundError(
1829+
"Profile 'nonexistent-profile' not found"
1830+
)
1831+
1832+
subscriber = AutoTitleSubscriber(service=service)
1833+
await subscriber(self._user_message_event())
1834+
await self._drain_title_task(lambda: service.stored.title is not None)
1835+
1836+
# Failed profile load → falls back to agent.llm
1837+
assert mock_generate_title.called
1838+
assert mock_generate_title.call_args.args[1] is agent_llm
1839+
1840+
assert service.stored.title == "✨ Agent LLM Title"
1841+
service.save_meta.assert_called_once()
1842+
1843+
@pytest.mark.asyncio
1844+
async def test_autotitle_no_profile_uses_agent_llm(self):
1845+
"""No profile configured → use agent.llm (preserves existing behavior)."""
1846+
service = self._make_service(title_llm_profile=None)
1847+
agent_llm = service._conversation.agent.llm
1848+
1849+
with patch(
1850+
self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title"
1851+
) as mock_generate_title:
1852+
subscriber = AutoTitleSubscriber(service=service)
1853+
await subscriber(self._user_message_event())
1854+
await self._drain_title_task(lambda: service.stored.title is not None)
1855+
1856+
# No profile → agent.llm is used (backwards compatible)
1857+
assert mock_generate_title.called
1858+
assert mock_generate_title.call_args.args[1] is agent_llm
1859+
1860+
assert service.stored.title == "✨ Agent LLM Title"
1861+
service.save_meta.assert_called_once()
1862+
1863+
@pytest.mark.asyncio
1864+
async def test_autotitle_handles_profile_load_value_error(self):
1865+
"""Profile load ValueError → fall back to agent.llm."""
1866+
service = self._make_service(title_llm_profile="corrupted-profile")
1867+
agent_llm = service._conversation.agent.llm
1868+
1869+
with (
1870+
patch("openhands.sdk.llm.llm_profile_store.LLMProfileStore") as MockStore,
1871+
patch(
1872+
self._GENERATE_TITLE_PATH, return_value="✨ Agent LLM Title"
1873+
) as mock_generate_title,
1874+
):
1875+
mock_store_instance = MockStore.return_value
1876+
mock_store_instance.load.side_effect = ValueError("Invalid profile format")
1877+
1878+
subscriber = AutoTitleSubscriber(service=service)
1879+
await subscriber(self._user_message_event())
1880+
await self._drain_title_task(lambda: service.stored.title is not None)
1881+
1882+
assert mock_generate_title.called
1883+
assert mock_generate_title.call_args.args[1] is agent_llm
1884+
1885+
assert service.stored.title == "✨ Agent LLM Title"
1886+
service.save_meta.assert_called_once()
1887+
17671888
@pytest.mark.asyncio
17681889
async def test_autotitle_falls_back_for_acp_managed_llm(self):
1769-
"""ACP-managed agents should skip LLM title generation and fall back."""
1890+
"""ACP-managed agents with no title profile → truncation fallback."""
17701891
service = self._make_service(llm_model="acp-managed")
17711892
subscriber = AutoTitleSubscriber(service=service)
17721893

17731894
await subscriber(self._user_message_event("Fix the login bug"))
1774-
await asyncio.sleep(0)
1895+
await self._drain_title_task(lambda: service.stored.title is not None)
17751896

17761897
assert service.stored.title == "Fix the login bug"
17771898
service.save_meta.assert_called_once()
17781899

1900+
@pytest.mark.asyncio
1901+
async def test_autotitle_integration_routes_through_profile_store(self, tmp_path):
1902+
"""End-to-end: profile on disk → LLMProfileStore.load → title LLM call.
1903+
1904+
Exercises the real wiring from AutoTitleSubscriber through LLMProfileStore
1905+
to LLM.completion. Only the network boundary (LLM.completion) is mocked,
1906+
so this catches regressions in profile loading, LLM passthrough, and the
1907+
agent-server → SDK integration — the unit tests above only exercise
1908+
AutoTitleSubscriber in isolation.
1909+
"""
1910+
from litellm.types.utils import (
1911+
Choices,
1912+
Message as LiteLLMMessage,
1913+
ModelResponse,
1914+
Usage,
1915+
)
1916+
1917+
from openhands.sdk.llm import LLMResponse, MetricsSnapshot
1918+
from openhands.sdk.llm.llm_profile_store import LLMProfileStore
1919+
1920+
# Persist a real LLM profile to disk with a distinctive usage_id so we
1921+
# can tell the title LLM apart from the agent's LLM in the assertion.
1922+
profile_dir = tmp_path / "profiles"
1923+
title_llm_on_disk = LLM(
1924+
usage_id="title-llm",
1925+
model="claude-haiku-4-5",
1926+
api_key=SecretStr("title-key"),
1927+
)
1928+
LLMProfileStore(base_dir=profile_dir).save(
1929+
"title-fast", title_llm_on_disk, include_secrets=True
1930+
)
1931+
1932+
service = self._make_service(title_llm_profile="title-fast")
1933+
1934+
calls: list[str] = []
1935+
1936+
def fake_completion(self_llm, _messages, **_kwargs):
1937+
calls.append(self_llm.usage_id)
1938+
msg = LiteLLMMessage(content="✨ Generated", role="assistant")
1939+
choice = Choices(finish_reason="stop", index=0, message=msg)
1940+
raw = ModelResponse(
1941+
id="resp-1",
1942+
choices=[choice],
1943+
created=0,
1944+
model=self_llm.model,
1945+
object="chat.completion",
1946+
usage=Usage(prompt_tokens=1, completion_tokens=1, total_tokens=2),
1947+
)
1948+
return LLMResponse(
1949+
message=Message.from_llm_chat_message(choice["message"]),
1950+
metrics=MetricsSnapshot(
1951+
model_name=self_llm.model,
1952+
accumulated_cost=0.0,
1953+
max_budget_per_task=None,
1954+
accumulated_token_usage=None,
1955+
),
1956+
raw_response=raw,
1957+
)
1958+
1959+
# Point LLMProfileStore() (no args) at our tmp dir so the real
1960+
# _load_title_llm code path finds our on-disk profile.
1961+
with (
1962+
patch(
1963+
"openhands.sdk.llm.llm_profile_store._DEFAULT_PROFILE_DIR", profile_dir
1964+
),
1965+
patch(
1966+
"openhands.sdk.llm.llm.LLM.completion",
1967+
autospec=True,
1968+
side_effect=fake_completion,
1969+
),
1970+
):
1971+
subscriber = AutoTitleSubscriber(service=service)
1972+
await subscriber(self._user_message_event("Fix the login bug"))
1973+
# Wait for the background executor task to complete. The production
1974+
# code uses run_in_executor, so sleep(0) is not enough.
1975+
for _ in range(50):
1976+
await asyncio.sleep(0.02)
1977+
if service.stored.title is not None:
1978+
break
1979+
1980+
# The profile's LLM (usage_id="title-llm") was called — not agent.llm
1981+
# (usage_id="test-llm"). This is the regression-sensitive assertion.
1982+
assert calls == ["title-llm"], (
1983+
f"Expected only the title profile LLM to be called, got: {calls}"
1984+
)
1985+
assert service.stored.title == "✨ Generated"
1986+
service.save_meta.assert_called_once()
1987+
17791988

17801989
class TestACPActivityHeartbeatWiring:
17811990
"""Tests for _setup_acp_activity_heartbeat in EventService."""

0 commit comments

Comments
 (0)