|
17 | 17 | from .._thinking_part import split_content_into_text_and_thinking |
18 | 18 | from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime |
19 | 19 | from ..builtin_tools import CodeExecutionTool, WebSearchTool |
20 | | -from ..exceptions import UserError |
| 20 | +from ..exceptions import StreamCancelled, UserError |
21 | 21 | from ..messages import ( |
22 | 22 | AudioUrl, |
23 | 23 | BinaryContent, |
@@ -222,6 +222,17 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False): |
222 | 222 | `medium`, and `high`. |
223 | 223 | """ |
224 | 224 |
|
| 225 | + openai_previous_response_id: Literal['auto'] | str |
| 226 | + """The ID of a previous response from the model to use as the starting point for a continued conversation. |
| 227 | +
|
| 228 | + When set to `'auto'`, the request automatically uses the most recent |
| 229 | + `provider_response_id` from the message history and omits earlier messages. |
| 230 | +
|
| 231 | + This enables the model to use server-side conversation state and faithfully reference previous reasoning. |
| 232 | + See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) |
| 233 | + for more information. |
| 234 | + """ |
| 235 | + |
225 | 236 |
|
226 | 237 | @dataclass(init=False) |
227 | 238 | class OpenAIChatModel(Model): |
@@ -977,6 +988,10 @@ async def _responses_create( |
977 | 988 | else: |
978 | 989 | tool_choice = 'auto' |
979 | 990 |
|
| 991 | + previous_response_id = model_settings.get('openai_previous_response_id') |
| 992 | + if previous_response_id == 'auto': |
| 993 | + previous_response_id, messages = self._get_previous_response_id_and_new_messages(messages) |
| 994 | + |
980 | 995 | instructions, openai_messages = await self._map_messages(messages, model_settings) |
981 | 996 | reasoning = self._get_reasoning(model_settings) |
982 | 997 |
|
@@ -1027,6 +1042,7 @@ async def _responses_create( |
1027 | 1042 | truncation=model_settings.get('openai_truncation', NOT_GIVEN), |
1028 | 1043 | timeout=model_settings.get('timeout', NOT_GIVEN), |
1029 | 1044 | service_tier=model_settings.get('openai_service_tier', NOT_GIVEN), |
| 1045 | + previous_response_id=previous_response_id, |
1030 | 1046 | reasoning=reasoning, |
1031 | 1047 | user=model_settings.get('openai_user', NOT_GIVEN), |
1032 | 1048 | text=text or NOT_GIVEN, |
@@ -1092,6 +1108,28 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam |
1092 | 1108 | ), |
1093 | 1109 | } |
1094 | 1110 |
|
| 1111 | + def _get_previous_response_id_and_new_messages( |
| 1112 | + self, messages: list[ModelMessage] |
| 1113 | + ) -> tuple[str | None, list[ModelMessage]]: |
| 1114 | + # When `openai_previous_response_id` is set to 'auto', the most recent |
| 1115 | + # `provider_response_id` from the message history is selected and all |
| 1116 | + # earlier messages are omitted. This allows the OpenAI SDK to reuse |
| 1117 | + # server-side history for efficiency. The returned tuple contains the |
| 1118 | + # `previous_response_id` (if found) and the trimmed list of messages. |
| 1119 | + previous_response_id = None |
| 1120 | + trimmed_messages: list[ModelMessage] = [] |
| 1121 | + for m in reversed(messages): |
| 1122 | + if isinstance(m, ModelResponse) and m.provider_name == self.system: |
| 1123 | + previous_response_id = m.provider_response_id |
| 1124 | + break |
| 1125 | + else: |
| 1126 | + trimmed_messages.append(m) |
| 1127 | + |
| 1128 | + if previous_response_id and trimmed_messages: |
| 1129 | + return previous_response_id, list(reversed(trimmed_messages)) |
| 1130 | + else: |
| 1131 | + return None, messages |
| 1132 | + |
1095 | 1133 | async def _map_messages( # noqa: C901 |
1096 | 1134 | self, messages: list[ModelMessage], model_settings: OpenAIResponsesModelSettings |
1097 | 1135 | ) -> tuple[str | NotGiven, list[responses.ResponseInputItemParam]]: |
@@ -1309,9 +1347,14 @@ class OpenAIStreamedResponse(StreamedResponse): |
1309 | 1347 | _response: AsyncIterable[ChatCompletionChunk] |
1310 | 1348 | _timestamp: datetime |
1311 | 1349 | _provider_name: str |
| 1350 | + _cancelled: bool = field(default=False, init=False) |
1312 | 1351 |
|
1313 | 1352 | async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]: |
1314 | 1353 | async for chunk in self._response: |
| 1354 | + # Check for cancellation before processing each chunk |
| 1355 | + if self._cancelled: |
| 1356 | + raise StreamCancelled('OpenAI stream was cancelled') |
| 1357 | + |
1315 | 1358 | self._usage += _map_usage(chunk) |
1316 | 1359 |
|
1317 | 1360 | if chunk.id and self.provider_response_id is None: |
@@ -1380,6 +1423,14 @@ def timestamp(self) -> datetime: |
1380 | 1423 | """Get the timestamp of the response.""" |
1381 | 1424 | return self._timestamp |
1382 | 1425 |
|
| 1426 | + async def cancel(self) -> None: |
| 1427 | + """Cancel the streaming response. |
| 1428 | +
|
| 1429 | + This marks the stream as cancelled, which will cause the iterator to raise |
| 1430 | + a StreamCancelled exception on the next iteration. |
| 1431 | + """ |
| 1432 | + self._cancelled = True |
| 1433 | + |
1383 | 1434 |
|
1384 | 1435 | @dataclass |
1385 | 1436 | class OpenAIResponsesStreamedResponse(StreamedResponse): |
|
0 commit comments