feat: Support reasoning content(WIP)

liuruibin · liuruibin · commit b9d5d415cff0 · 2025-02-07T14:53:20.000+08:00
diff --git a/apps/setting/models_provider/impl/base_chat_open_ai.py b/apps/setting/models_provider/impl/base_chat_open_ai.py
@@ -1,12 +1,16 @@
 # coding=utf-8
+import warnings
+from typing import List, Dict, Optional, Any, Iterator, cast, Type
 
-from typing import List, Dict, Optional, Any, Iterator, cast
-
+import openai
+from langchain_core.callbacks import CallbackManagerForLLMRun
 from langchain_core.language_models import LanguageModelInput
-from langchain_core.messages import BaseMessage, get_buffer_string
+from langchain_core.messages import BaseMessage, get_buffer_string, BaseMessageChunk, AIMessageChunk
 from langchain_core.outputs import ChatGenerationChunk, ChatGeneration
 from langchain_core.runnables import RunnableConfig, ensure_config
+from langchain_core.utils.pydantic import is_basemodel_subclass
 from langchain_openai import ChatOpenAI
+from langchain_openai.chat_models.base import _convert_chunk_to_generation_chunk
 
 from common.config.tokenizer_manage_config import TokenizerManage
 
@@ -36,14 +40,81 @@ def get_num_tokens(self, text: str) -> int:
         return self.get_last_generation_info().get('output_tokens', 0)
 
     def _stream(
-            self, *args: Any, stream_usage: Optional[bool] = None, **kwargs: Any
+            self,
+            messages: List[BaseMessage],
+            stop: Optional[List[str]] = None,
+            run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
+
+        """Set default stream_options."""
+        stream_usage = self._should_stream_usage(kwargs.get('stream_usage'), **kwargs)
+        # Note: stream_options is not a valid parameter for Azure OpenAI.
+        # To support users proxying Azure through ChatOpenAI, here we only specify
+        # stream_options if include_usage is set to True.
+        # See https://learn.microsoft.com/en-us/azure/ai-services/openai/whats-new
+        # for release notes.
+        if stream_usage:
+            kwargs["stream_options"] = {"include_usage": stream_usage}
+
         kwargs["stream"] = True
-        kwargs["stream_options"] = {"include_usage": True}
-        for chunk in super()._stream(*args, stream_usage=stream_usage, **kwargs):
-            if chunk.message.usage_metadata is not None:
-                self.usage_metadata = chunk.message.usage_metadata
-            yield chunk
+        payload = self._get_request_payload(messages, stop=stop, **kwargs)
+        default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
+        base_generation_info = {}
+
+        if "response_format" in payload and is_basemodel_subclass(
+                payload["response_format"]
+        ):
+            # TODO: Add support for streaming with Pydantic response_format.
+            warnings.warn("Streaming with Pydantic response_format not yet supported.")
+            chat_result = self._generate(
+                messages, stop, run_manager=run_manager, **kwargs
+            )
+            msg = chat_result.generations[0].message
+            yield ChatGenerationChunk(
+                message=AIMessageChunk(
+                    **msg.dict(exclude={"type", "additional_kwargs"}),
+                    # preserve the "parsed" Pydantic object without converting to dict
+                    additional_kwargs=msg.additional_kwargs,
+                ),
+                generation_info=chat_result.generations[0].generation_info,
+            )
+            return
+        if self.include_response_headers:
+            raw_response = self.client.with_raw_response.create(**payload)
+            response = raw_response.parse()
+            base_generation_info = {"headers": dict(raw_response.headers)}
+        else:
+            response = self.client.create(**payload)
+        with response:
+            is_first_chunk = True
+            for chunk in response:
+                if not isinstance(chunk, dict):
+                    chunk = chunk.model_dump()
+
+                generation_chunk = _convert_chunk_to_generation_chunk(
+                    chunk,
+                    default_chunk_class,
+                    base_generation_info if is_first_chunk else {},
+                )
+                if generation_chunk is None:
+                    continue
+
+                # custom code
+                if generation_chunk.message.usage_metadata is not None:
+                    self.usage_metadata = generation_chunk.message.usage_metadata
+                # custom code
+                if chunk['choices'][0]['delta']['reasoning_content']:
+                    generation_chunk.message.additional_kwargs["reasoning_content"] = chunk['choices'][0]['delta']['reasoning_content']
+
+                default_chunk_class = generation_chunk.message.__class__
+                logprobs = (generation_chunk.generation_info or {}).get("logprobs")
+                if run_manager:
+                    run_manager.on_llm_new_token(
+                        generation_chunk.text, chunk=generation_chunk, logprobs=logprobs
+                    )
+                is_first_chunk = False
+                yield generation_chunk
 
     def invoke(
             self,