77import json
88from mimetypes import guess_file_type
99from pathlib import Path
10+ import re
1011from typing import TYPE_CHECKING , Any , Literal , cast
1112
1213import openai
2930 ResponseInputImageParam ,
3031 ResponseInputMessageContentListParam ,
3132 ResponseInputParam ,
33+ ResponseInputTextParam ,
3234 ResponseOutputItemAddedEvent ,
3335 ResponseOutputItemDoneEvent ,
3436 ResponseOutputMessage ,
7779 CONF_WEB_SEARCH_CITY ,
7880 CONF_WEB_SEARCH_CONTEXT_SIZE ,
7981 CONF_WEB_SEARCH_COUNTRY ,
82+ CONF_WEB_SEARCH_INLINE_CITATIONS ,
8083 CONF_WEB_SEARCH_REGION ,
8184 CONF_WEB_SEARCH_TIMEZONE ,
8285 CONF_WEB_SEARCH_USER_LOCATION ,
9093 RECOMMENDED_TOP_P ,
9194 RECOMMENDED_VERBOSITY ,
9295 RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE ,
96+ RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS ,
9397)
9498
9599if TYPE_CHECKING :
@@ -251,13 +255,21 @@ def _convert_content_to_param(
251255async def _transform_stream ( # noqa: C901 - This is complex, but better to have it in one place
252256 chat_log : conversation .ChatLog ,
253257 stream : AsyncStream [ResponseStreamEvent ],
258+ remove_citations : bool = False ,
254259) -> AsyncGenerator [
255260 conversation .AssistantContentDeltaDict | conversation .ToolResultContentDeltaDict
256261]:
257262 """Transform an OpenAI delta stream into HA format."""
258263 last_summary_index = None
259264 last_role : Literal ["assistant" , "tool_result" ] | None = None
260265
266+ # Non-reasoning models don't follow our request to remove citations, so we remove
267+ # them manually here. They always follow the same pattern: the citation is always
268+ # in parentheses in Markdown format, the citation is always in a single delta event,
269+ # and sometimes the closing parenthesis is split into a separate delta event.
270+ remove_parentheses : bool = False
271+ citation_regexp = re .compile (r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)" )
272+
261273 async for event in stream :
262274 LOGGER .debug ("Received event: %s" , event )
263275
@@ -344,7 +356,23 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
344356 yield {"native" : event .item }
345357 last_summary_index = - 1 # Trigger new assistant message on next turn
346358 elif isinstance (event , ResponseTextDeltaEvent ):
347- yield {"content" : event .delta }
359+ data = event .delta
360+ if remove_parentheses :
361+ data = data .removeprefix (")" )
362+ remove_parentheses = False
363+ elif remove_citations and (match := citation_regexp .search (data )):
364+ match_start , match_end = match .span ()
365+ # remove leading space if any
366+ if data [match_start - 1 : match_start ] == " " :
367+ match_start -= 1
368+ # remove closing parenthesis:
369+ if data [match_end : match_end + 1 ] == ")" :
370+ match_end += 1
371+ else :
372+ remove_parentheses = True
373+ data = data [:match_start ] + data [match_end :]
374+ if data :
375+ yield {"content" : data }
348376 elif isinstance (event , ResponseReasoningSummaryTextDeltaEvent ):
349377 # OpenAI can output several reasoning summaries
350378 # in a single ResponseReasoningItem. We split them as separate
@@ -489,6 +517,7 @@ async def _async_handle_chat_log(
489517 for tool in chat_log .llm_api .tools
490518 ]
491519
520+ remove_citations = False
492521 if options .get (CONF_WEB_SEARCH ):
493522 web_search = WebSearchToolParam (
494523 type = "web_search" ,
@@ -504,6 +533,27 @@ async def _async_handle_chat_log(
504533 country = options .get (CONF_WEB_SEARCH_COUNTRY , "" ),
505534 timezone = options .get (CONF_WEB_SEARCH_TIMEZONE , "" ),
506535 )
536+ if not options .get (
537+ CONF_WEB_SEARCH_INLINE_CITATIONS ,
538+ RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS ,
539+ ):
540+ system_message = cast (EasyInputMessageParam , messages [0 ])
541+ content = system_message ["content" ]
542+ if isinstance (content , str ):
543+ system_message ["content" ] = [
544+ ResponseInputTextParam (type = "input_text" , text = content )
545+ ]
546+ system_message ["content" ].append ( # type: ignore[union-attr]
547+ ResponseInputTextParam (
548+ type = "input_text" ,
549+ text = "When doing a web search, do not include source citations" ,
550+ )
551+ )
552+
553+ if "reasoning" not in model_args :
554+ # Reasoning models handle this correctly with just a prompt
555+ remove_citations = True
556+
507557 tools .append (web_search )
508558
509559 if options .get (CONF_CODE_INTERPRETER ):
@@ -573,7 +623,8 @@ async def _async_handle_chat_log(
573623 [
574624 content
575625 async for content in chat_log .async_add_delta_content_stream (
576- self .entity_id , _transform_stream (chat_log , stream )
626+ self .entity_id ,
627+ _transform_stream (chat_log , stream , remove_citations ),
577628 )
578629 ]
579630 )
0 commit comments