From 3ea0d8eafd8e478f639e4e97f554bd00509d124e Mon Sep 17 00:00:00 2001 From: ks6088ts Date: Fri, 5 Sep 2025 17:47:06 +0900 Subject: [PATCH] add responses api model --- docs/references.md | 11 +++++- scripts/azure_openai_operator.py | 47 ++++++++++++++++++++++++ template_langgraph/llms/azure_openais.py | 29 +++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/docs/references.md b/docs/references.md index 09c6be7..60bf95b 100644 --- a/docs/references.md +++ b/docs/references.md @@ -66,7 +66,9 @@ - [python-sounddevice](https://github.com/spatialaudio/python-sounddevice) - [python-soundfile](https://github.com/bastibe/python-soundfile) -### Realtime API +### OpenAI + +#### Realtime API - [August 2025 / Realtime API audio model GA](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/whats-new#realtime-api-audio-model-ga) - [Global Standard model availability](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#global-standard-model-availability) @@ -75,3 +77,10 @@ - [GPT-4o Realtime API for speech and audio](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/realtime-audio-quickstart?tabs=keyless%2Clinux&pivots=programming-language-python) - [OpenAI Python API library > examples/realtime](https://github.com/openai/openai-python/tree/main/examples/realtime) - [How to use the GPT-4o Realtime API via WebRTC](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-webrtc) + +#### Responses API + +- [OpenAI / New tools for building agents](https://openai.com/index/new-tools-for-building-agents/) +- [OpenAI / Responses](https://platform.openai.com/docs/api-reference/responses) +- [Azure OpenAI Responses API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/responses?tabs=python-key) +- [LangChain / Responses API](https://python.langchain.com/docs/integrations/chat/openai/#responses-api) diff --git a/scripts/azure_openai_operator.py b/scripts/azure_openai_operator.py index 7f3f54a..4e03808 100644 --- a/scripts/azure_openai_operator.py +++ b/scripts/azure_openai_operator.py @@ -212,6 +212,53 @@ def image( logger.info(f"Output: {response.content}") +@app.command() +def responses( + query: str = typer.Option( + "What is the weather like today?", + "--query", + "-q", + help="Query to run with the Azure OpenAI chat model", + ), + stream: bool = typer.Option( + False, + "--stream", + "-s", + help="Enable streaming output", + ), + verbose: bool = typer.Option( + False, + "--verbose", + "-v", + help="Enable verbose output", + ), +): + set_verbose_logging(verbose) + + logger.info("Running...") + llm = AzureOpenAiWrapper().responses_model + + if stream: + for chunk in llm.stream( + input=[ + HumanMessage(content=query), + ], + ): + # FIXME: Currently, just dump the whole chunk + print(chunk) + else: + response = llm.invoke( + input=query, + ) + logger.debug( + response.model_dump_json( + indent=2, + exclude_none=True, + ) + ) + logger.info(f"Output: {response.content}") + + if __name__ == "__main__": load_dotenv( override=True, diff --git a/template_langgraph/llms/azure_openais.py b/template_langgraph/llms/azure_openais.py index e47f220..dcf9a06 100644 --- a/template_langgraph/llms/azure_openais.py +++ b/template_langgraph/llms/azure_openais.py @@ -45,6 +45,7 @@ def __init__(self, settings: Settings = None): self._chat_model: AzureChatOpenAI | None = None self._reasoning_model: AzureChatOpenAI | None = None self._embedding_model: AzureOpenAIEmbeddings | None = None + self._responses_model: AzureChatOpenAI | None = None def _get_auth_key(self) -> str: """Generate a key for authentication caching based on settings.""" @@ -138,6 +139,34 @@ def embedding_model(self) -> AzureOpenAIEmbeddings: ) return self._embedding_model + @property + def responses_model(self) -> AzureChatOpenAI: + """Lazily initialize and return responses API model.""" + if self._responses_model is None: + if self.settings.azure_openai_use_microsoft_entra_id.lower() == "true": + token = self._get_auth_token() + self._responses_model = AzureChatOpenAI( + azure_ad_token=token, + azure_endpoint=self.settings.azure_openai_endpoint, + api_version=self.settings.azure_openai_api_version, + azure_deployment=self.settings.azure_openai_model_chat, + streaming=True, + model=self.settings.azure_openai_model_chat, + output_version="responses/v1", + ) + else: + logger.info("Using API key for authentication") + self._responses_model = AzureChatOpenAI( + api_key=self.settings.azure_openai_api_key, + azure_endpoint=self.settings.azure_openai_endpoint, + api_version=self.settings.azure_openai_api_version, + azure_deployment=self.settings.azure_openai_model_chat, + streaming=True, + model=self.settings.azure_openai_model_chat, + output_version="responses/v1", + ) + return self._responses_model + def create_embedding(self, text: str): """Create an embedding for the given text.""" return self.embedding_model.embed_query(text)