Skip to content

Commit 8d50754

Browse files
Shulyakaballoob
andauthored
Update OpenAI suggested prompt to not include citations (home-assistant#154292)
Co-authored-by: Paulus Schoutsen <[email protected]> Co-authored-by: Paulus Schoutsen <[email protected]>
1 parent 6ee71da commit 8d50754

File tree

7 files changed

+133
-7
lines changed

7 files changed

+133
-7
lines changed

homeassistant/components/openai_conversation/config_flow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
CONF_WEB_SEARCH_CITY,
5656
CONF_WEB_SEARCH_CONTEXT_SIZE,
5757
CONF_WEB_SEARCH_COUNTRY,
58+
CONF_WEB_SEARCH_INLINE_CITATIONS,
5859
CONF_WEB_SEARCH_REGION,
5960
CONF_WEB_SEARCH_TIMEZONE,
6061
CONF_WEB_SEARCH_USER_LOCATION,
@@ -73,6 +74,7 @@
7374
RECOMMENDED_VERBOSITY,
7475
RECOMMENDED_WEB_SEARCH,
7576
RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE,
77+
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
7678
RECOMMENDED_WEB_SEARCH_USER_LOCATION,
7779
UNSUPPORTED_IMAGE_MODELS,
7880
UNSUPPORTED_MODELS,
@@ -396,6 +398,10 @@ async def async_step_model(
396398
CONF_WEB_SEARCH_USER_LOCATION,
397399
default=RECOMMENDED_WEB_SEARCH_USER_LOCATION,
398400
): bool,
401+
vol.Optional(
402+
CONF_WEB_SEARCH_INLINE_CITATIONS,
403+
default=RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
404+
): bool,
399405
}
400406
)
401407
elif CONF_WEB_SEARCH in options:
@@ -411,6 +417,7 @@ async def async_step_model(
411417
CONF_WEB_SEARCH_REGION,
412418
CONF_WEB_SEARCH_COUNTRY,
413419
CONF_WEB_SEARCH_TIMEZONE,
420+
CONF_WEB_SEARCH_INLINE_CITATIONS,
414421
)
415422
}
416423

homeassistant/components/openai_conversation/const.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
CONF_WEB_SEARCH_REGION = "region"
3131
CONF_WEB_SEARCH_COUNTRY = "country"
3232
CONF_WEB_SEARCH_TIMEZONE = "timezone"
33+
CONF_WEB_SEARCH_INLINE_CITATIONS = "inline_citations"
3334
RECOMMENDED_CODE_INTERPRETER = False
3435
RECOMMENDED_CHAT_MODEL = "gpt-4o-mini"
3536
RECOMMENDED_IMAGE_MODEL = "gpt-image-1"
@@ -41,6 +42,7 @@
4142
RECOMMENDED_WEB_SEARCH = False
4243
RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE = "medium"
4344
RECOMMENDED_WEB_SEARCH_USER_LOCATION = False
45+
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS = False
4446

4547
UNSUPPORTED_MODELS: list[str] = [
4648
"o1-mini",

homeassistant/components/openai_conversation/entity.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import json
88
from mimetypes import guess_file_type
99
from pathlib import Path
10+
import re
1011
from typing import TYPE_CHECKING, Any, Literal, cast
1112

1213
import openai
@@ -29,6 +30,7 @@
2930
ResponseInputImageParam,
3031
ResponseInputMessageContentListParam,
3132
ResponseInputParam,
33+
ResponseInputTextParam,
3234
ResponseOutputItemAddedEvent,
3335
ResponseOutputItemDoneEvent,
3436
ResponseOutputMessage,
@@ -77,6 +79,7 @@
7779
CONF_WEB_SEARCH_CITY,
7880
CONF_WEB_SEARCH_CONTEXT_SIZE,
7981
CONF_WEB_SEARCH_COUNTRY,
82+
CONF_WEB_SEARCH_INLINE_CITATIONS,
8083
CONF_WEB_SEARCH_REGION,
8184
CONF_WEB_SEARCH_TIMEZONE,
8285
CONF_WEB_SEARCH_USER_LOCATION,
@@ -90,6 +93,7 @@
9093
RECOMMENDED_TOP_P,
9194
RECOMMENDED_VERBOSITY,
9295
RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE,
96+
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
9397
)
9498

9599
if TYPE_CHECKING:
@@ -251,13 +255,21 @@ def _convert_content_to_param(
251255
async def _transform_stream( # noqa: C901 - This is complex, but better to have it in one place
252256
chat_log: conversation.ChatLog,
253257
stream: AsyncStream[ResponseStreamEvent],
258+
remove_citations: bool = False,
254259
) -> AsyncGenerator[
255260
conversation.AssistantContentDeltaDict | conversation.ToolResultContentDeltaDict
256261
]:
257262
"""Transform an OpenAI delta stream into HA format."""
258263
last_summary_index = None
259264
last_role: Literal["assistant", "tool_result"] | None = None
260265

266+
# Non-reasoning models don't follow our request to remove citations, so we remove
267+
# them manually here. They always follow the same pattern: the citation is always
268+
# in parentheses in Markdown format, the citation is always in a single delta event,
269+
# and sometimes the closing parenthesis is split into a separate delta event.
270+
remove_parentheses: bool = False
271+
citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)")
272+
261273
async for event in stream:
262274
LOGGER.debug("Received event: %s", event)
263275

@@ -344,7 +356,23 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
344356
yield {"native": event.item}
345357
last_summary_index = -1 # Trigger new assistant message on next turn
346358
elif isinstance(event, ResponseTextDeltaEvent):
347-
yield {"content": event.delta}
359+
data = event.delta
360+
if remove_parentheses:
361+
data = data.removeprefix(")")
362+
remove_parentheses = False
363+
elif remove_citations and (match := citation_regexp.search(data)):
364+
match_start, match_end = match.span()
365+
# remove leading space if any
366+
if data[match_start - 1 : match_start] == " ":
367+
match_start -= 1
368+
# remove closing parenthesis:
369+
if data[match_end : match_end + 1] == ")":
370+
match_end += 1
371+
else:
372+
remove_parentheses = True
373+
data = data[:match_start] + data[match_end:]
374+
if data:
375+
yield {"content": data}
348376
elif isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
349377
# OpenAI can output several reasoning summaries
350378
# in a single ResponseReasoningItem. We split them as separate
@@ -489,6 +517,7 @@ async def _async_handle_chat_log(
489517
for tool in chat_log.llm_api.tools
490518
]
491519

520+
remove_citations = False
492521
if options.get(CONF_WEB_SEARCH):
493522
web_search = WebSearchToolParam(
494523
type="web_search",
@@ -504,6 +533,27 @@ async def _async_handle_chat_log(
504533
country=options.get(CONF_WEB_SEARCH_COUNTRY, ""),
505534
timezone=options.get(CONF_WEB_SEARCH_TIMEZONE, ""),
506535
)
536+
if not options.get(
537+
CONF_WEB_SEARCH_INLINE_CITATIONS,
538+
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
539+
):
540+
system_message = cast(EasyInputMessageParam, messages[0])
541+
content = system_message["content"]
542+
if isinstance(content, str):
543+
system_message["content"] = [
544+
ResponseInputTextParam(type="input_text", text=content)
545+
]
546+
system_message["content"].append( # type: ignore[union-attr]
547+
ResponseInputTextParam(
548+
type="input_text",
549+
text="When doing a web search, do not include source citations",
550+
)
551+
)
552+
553+
if "reasoning" not in model_args:
554+
# Reasoning models handle this correctly with just a prompt
555+
remove_citations = True
556+
507557
tools.append(web_search)
508558

509559
if options.get(CONF_CODE_INTERPRETER):
@@ -573,7 +623,8 @@ async def _async_handle_chat_log(
573623
[
574624
content
575625
async for content in chat_log.async_add_delta_content_stream(
576-
self.entity_id, _transform_stream(chat_log, stream)
626+
self.entity_id,
627+
_transform_stream(chat_log, stream, remove_citations),
577628
)
578629
]
579630
)

homeassistant/components/openai_conversation/strings.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"data": {
5252
"code_interpreter": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::code_interpreter%]",
5353
"image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::image_model%]",
54+
"inline_citations": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::inline_citations%]",
5455
"reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::reasoning_effort%]",
5556
"search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::search_context_size%]",
5657
"user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::user_location%]",
@@ -59,6 +60,7 @@
5960
"data_description": {
6061
"code_interpreter": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::code_interpreter%]",
6162
"image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::image_model%]",
63+
"inline_citations": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::inline_citations%]",
6264
"reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::reasoning_effort%]",
6365
"search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::search_context_size%]",
6466
"user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::user_location%]",
@@ -74,7 +76,6 @@
7476
"reconfigure_successful": "[%key:common::config_flow::abort::reconfigure_successful%]"
7577
},
7678
"entry_type": "Conversation agent",
77-
7879
"error": {
7980
"model_not_supported": "This model is not supported, please select a different model",
8081
"web_search_minimal_reasoning": "Web search is currently not supported with minimal reasoning effort"
@@ -108,6 +109,7 @@
108109
"data": {
109110
"code_interpreter": "Enable code interpreter tool",
110111
"image_model": "Image generation model",
112+
"inline_citations": "Include links in web search results",
111113
"reasoning_effort": "Reasoning effort",
112114
"search_context_size": "Search context size",
113115
"user_location": "Include home location",
@@ -116,6 +118,7 @@
116118
"data_description": {
117119
"code_interpreter": "This tool, also known as the python tool to the model, allows it to run code to answer questions",
118120
"image_model": "The model to use when generating images",
121+
"inline_citations": "If disabled, additional prompt is added to ask the model to not include source citations",
119122
"reasoning_effort": "How many reasoning tokens the model should generate before creating a response to the prompt",
120123
"search_context_size": "High level guidance for the amount of context window space to use for the search",
121124
"user_location": "Refine search results based on geography",

tests/components/openai_conversation/snapshots/test_conversation.ambr

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@
207207
}),
208208
])
209209
# ---
210-
# name: test_web_search
210+
# name: test_web_search[False]
211211
list([
212212
dict({
213213
'content': "What's on the latest news?",
@@ -224,7 +224,40 @@
224224
'type': 'web_search_call',
225225
}),
226226
dict({
227-
'content': 'Home Assistant now supports ChatGPT Search in Assist',
227+
'content': 'Home Assistant now supports ChatGPT Search in Assist.',
228+
'role': 'assistant',
229+
'type': 'message',
230+
}),
231+
dict({
232+
'content': 'Thank you!',
233+
'role': 'user',
234+
'type': 'message',
235+
}),
236+
dict({
237+
'content': 'You are welcome!',
238+
'role': 'assistant',
239+
'type': 'message',
240+
}),
241+
])
242+
# ---
243+
# name: test_web_search[True]
244+
list([
245+
dict({
246+
'content': "What's on the latest news?",
247+
'role': 'user',
248+
'type': 'message',
249+
}),
250+
dict({
251+
'action': dict({
252+
'query': 'query',
253+
'type': 'search',
254+
}),
255+
'id': 'ws_A',
256+
'status': 'completed',
257+
'type': 'web_search_call',
258+
}),
259+
dict({
260+
'content': 'Home Assistant now supports ChatGPT Search in Assist ([release notes](https://www.home-assistant.io/blog/categories/release-notes/)).',
228261
'role': 'assistant',
229262
'type': 'message',
230263
}),

0 commit comments

Comments
 (0)