Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
f59979d
WIP
mattgotteiner Mar 22, 2025
9ba2353
WIP
mattgotteiner Mar 23, 2025
a2d6e31
ruff, black
mattgotteiner Mar 23, 2025
ab27f5e
adding usage
mattgotteiner Mar 23, 2025
80bcfb4
mypy
mattgotteiner Mar 23, 2025
79b682b
ruff, black
mattgotteiner Mar 23, 2025
28d5bfd
mypy, ruff, black, and update generate thought steps
mattgotteiner Mar 23, 2025
4612ccc
fix comments, set answer thought tag on streaming approaches
mattgotteiner Mar 23, 2025
aae8966
fixing frontend
mattgotteiner Mar 23, 2025
84d3e94
fixing backend + frontend
mattgotteiner Mar 24, 2025
cef2cea
token graph fixup
mattgotteiner Mar 24, 2025
403e294
fix token usage for non-streaming response
mattgotteiner Mar 24, 2025
a22d993
re-style token graph
mattgotteiner Mar 24, 2025
1136185
updates
mattgotteiner Mar 24, 2025
e429ac6
Merge branch 'Azure-Samples:main' into matt/reasoning
mattgotteiner Mar 25, 2025
8420090
adddressing feedback
mattgotteiner Mar 27, 2025
2d00599
merging
mattgotteiner Mar 27, 2025
e5e462d
ruff, black
mattgotteiner Mar 27, 2025
c332053
prettify
mattgotteiner Mar 27, 2025
e68f4c3
fixing typing errors
mattgotteiner Mar 27, 2025
848abc4
ruff, black
mattgotteiner Mar 27, 2025
1354281
mypy, ruff
mattgotteiner Mar 28, 2025
8c1a88f
prettier
mattgotteiner Mar 28, 2025
0854f29
trying to fix test failures
mattgotteiner Mar 31, 2025
88a8785
fixing test failures
mattgotteiner Mar 31, 2025
9998c3c
fix streaming allowed setup
mattgotteiner Mar 31, 2025
8f40ada
black
mattgotteiner Mar 31, 2025
cc17995
try to fix e2e test
mattgotteiner Mar 31, 2025
8347150
add tests; updating env vars
mattgotteiner Mar 31, 2025
39bf02c
adding tests
mattgotteiner Apr 1, 2025
d131248
rerecording
mattgotteiner Apr 1, 2025
1f8e166
more recording
mattgotteiner Apr 1, 2025
ca0ae46
add tests; ruff, black
mattgotteiner Apr 2, 2025
41a3d2f
run prettier
mattgotteiner Apr 2, 2025
8c98686
update docs
mattgotteiner Apr 2, 2025
21e9958
fix test
mattgotteiner Apr 2, 2025
8eef1a3
fix linter
mattgotteiner Apr 2, 2025
04391b7
Fleshed out readme
pamelafox Apr 2, 2025
fff9c59
Adding reasoning to deploy featureS
pamelafox Apr 2, 2025
cdee1d5
Make changes from pamelas feedback
pamelafox Apr 2, 2025
5d081cc
undangle that comma
pamelafox Apr 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .azdo/pipelines/azure-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ steps:
AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY)
AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION)
AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU)
AZURE_OPENAI_REASONING_EFFORT: $(AZURE_OPENAI_REASONING_EFFORT)
AZURE_OPENAI_EMB_MODEL_NAME: $(AZURE_OPENAI_EMB_MODEL_NAME)
AZURE_OPENAI_EMB_DEPLOYMENT: $(AZURE_OPENAI_EMB_DEPLOYMENT)
AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY)
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/azure-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
AZURE_OPENAI_CHATGPT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT }}
AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY }}
AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION }}
AZURE_OPENAI_REASONING_EFFORT: ${{ vars.AZURE_OPENAI_REASONING_EFFORT }}
AZURE_OPENAI_EMB_MODEL_NAME: ${{ vars.AZURE_OPENAI_EMB_MODEL_NAME }}
AZURE_OPENAI_EMB_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT }}
AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY }}
Expand Down
29 changes: 29 additions & 0 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@
CONFIG_CHAT_HISTORY_COSMOS_ENABLED,
CONFIG_CHAT_VISION_APPROACH,
CONFIG_CREDENTIAL,
CONFIG_DEFAULT_REASONING_EFFORT,
CONFIG_GPT4V_DEPLOYED,
CONFIG_INGESTER,
CONFIG_LANGUAGE_PICKER_ENABLED,
CONFIG_OPENAI_CLIENT,
CONFIG_QUERY_REWRITING_ENABLED,
CONFIG_REASONING_EFFORT_ENABLED,
CONFIG_SEARCH_CLIENT,
CONFIG_SEMANTIC_RANKER_DEPLOYED,
CONFIG_SPEECH_INPUT_ENABLED,
Expand All @@ -79,6 +81,7 @@
CONFIG_SPEECH_SERVICE_LOCATION,
CONFIG_SPEECH_SERVICE_TOKEN,
CONFIG_SPEECH_SERVICE_VOICE,
CONFIG_STREAMING_ENABLED,
CONFIG_USER_BLOB_CONTAINER_CLIENT,
CONFIG_USER_UPLOAD_ENABLED,
CONFIG_VECTOR_SEARCH_ENABLED,
Expand Down Expand Up @@ -293,6 +296,9 @@ def config():
"showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
"showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
"showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
"showReasoningEffortOption": current_app.config[CONFIG_REASONING_EFFORT_ENABLED],
"streamingEnabled": current_app.config[CONFIG_STREAMING_ENABLED],
"defaultReasoningEffort": current_app.config[CONFIG_DEFAULT_REASONING_EFFORT],
"showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
"showUserUpload": current_app.config[CONFIG_USER_UPLOAD_ENABLED],
"showLanguagePicker": current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED],
Expand Down Expand Up @@ -423,6 +429,7 @@ async def setup_clients():
OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536)
OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT")
# Used with Azure OpenAI deployments
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_GPT4V_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT4V_DEPLOYMENT")
Expand Down Expand Up @@ -640,6 +647,13 @@ async def setup_clients():
current_app.config[CONFIG_QUERY_REWRITING_ENABLED] = (
AZURE_SEARCH_QUERY_REWRITING == "true" and AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
)
current_app.config[CONFIG_DEFAULT_REASONING_EFFORT] = OPENAI_REASONING_EFFORT
current_app.config[CONFIG_REASONING_EFFORT_ENABLED] = OPENAI_CHATGPT_MODEL in Approach.GPT_REASONING_MODELS
current_app.config[CONFIG_STREAMING_ENABLED] = (
bool(USE_GPT4V)
or OPENAI_CHATGPT_MODEL not in Approach.GPT_REASONING_MODELS
or Approach.GPT_REASONING_MODELS[OPENAI_CHATGPT_MODEL].streaming
)
current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER
Expand Down Expand Up @@ -667,6 +681,7 @@ async def setup_clients():
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
reasoning_effort=OPENAI_REASONING_EFFORT,
)

# ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
Expand All @@ -684,12 +699,26 @@ async def setup_clients():
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
reasoning_effort=OPENAI_REASONING_EFFORT,
)

if USE_GPT4V:
current_app.logger.info("USE_GPT4V is true, setting up GPT4V approach")
if not AZURE_OPENAI_GPT4V_MODEL:
raise ValueError("AZURE_OPENAI_GPT4V_MODEL must be set when USE_GPT4V is true")
if any(
model in Approach.GPT_REASONING_MODELS
for model in [
OPENAI_CHATGPT_MODEL,
AZURE_OPENAI_GPT4V_MODEL,
AZURE_OPENAI_CHATGPT_DEPLOYMENT,
AZURE_OPENAI_GPT4V_DEPLOYMENT,
]
):
raise ValueError(
"AZURE_OPENAI_CHATGPT_MODEL and AZURE_OPENAI_GPT4V_MODEL must not be a reasoning model when USE_GPT4V is true"
)

token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")

current_app.config[CONFIG_ASK_VISION_APPROACH] = RetrieveThenReadVisionApproach(
Expand Down
166 changes: 164 additions & 2 deletions app/backend/approaches/approach.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import os
from abc import ABC
from dataclasses import dataclass
Expand All @@ -6,9 +7,11 @@
AsyncGenerator,
Awaitable,
Callable,
Dict,
List,
Optional,
TypedDict,
Union,
cast,
)
from urllib.parse import urljoin
Expand All @@ -21,8 +24,16 @@
VectorizedQuery,
VectorQuery,
)
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from openai import AsyncOpenAI, AsyncStream
from openai.types import CompletionUsage
from openai.types.chat import (
ChatCompletion,
ChatCompletionChunk,
ChatCompletionDeveloperMessageParam,
ChatCompletionMessageParam,
ChatCompletionReasoningEffort,
ChatCompletionToolParam,
)

from approaches.promptmanager import PromptManager
from core.authentication import AuthenticationHelper
Expand Down Expand Up @@ -89,8 +100,68 @@ class ThoughtStep:
description: Optional[Any]
props: Optional[dict[str, Any]] = None

def update_token_usage(self, usage: CompletionUsage) -> None:
if self.props:
self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage)


@dataclass
class DataPoints:
text: Optional[List[str]] = None
images: Optional[List] = None


@dataclass
class ExtraInfo:
data_points: DataPoints
thoughts: Optional[List[ThoughtStep]] = None
followup_questions: Optional[List[Any]] = None


@dataclass
class TokenUsageProps:
prompt_tokens: int
completion_tokens: int
reasoning_tokens: Optional[int]
total_tokens: int

@classmethod
def from_completion_usage(cls, usage: CompletionUsage) -> "TokenUsageProps":
return cls(
prompt_tokens=usage.prompt_tokens,
completion_tokens=usage.completion_tokens,
reasoning_tokens=(
usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else None
),
total_tokens=usage.total_tokens,
)


# GPT reasoning models don't support the same set of parameters as other models
# https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning
@dataclass
class GPTReasoningModelSupport:
reasoning_effort: bool
tools: bool
system_messages: bool
streaming: bool


class Approach(ABC):
# List of GPT reasoning models support
GPT_REASONING_MODELS = {
"o1": GPTReasoningModelSupport(reasoning_effort=True, tools=True, system_messages=True, streaming=False),
"o1-preview": GPTReasoningModelSupport(
reasoning_effort=False, tools=False, system_messages=False, streaming=False
),
"o1-mini": GPTReasoningModelSupport(
reasoning_effort=False, tools=False, system_messages=False, streaming=False
),
"o3-mini": GPTReasoningModelSupport(reasoning_effort=True, tools=True, system_messages=True, streaming=True),
}
# Set a higher token limit for GPT reasoning models
RESPONSE_DEFAULT_TOKEN_LIMIT = 1024
RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT = 8192

def __init__(
self,
Expand All @@ -106,6 +177,8 @@ def __init__(
vision_endpoint: str,
vision_token_provider: Callable[[], Awaitable[str]],
prompt_manager: PromptManager,
reasoning_effort: Optional[str] = None,
include_token_usage: Optional[bool] = None,
):
self.search_client = search_client
self.openai_client = openai_client
Expand All @@ -119,6 +192,8 @@ def __init__(
self.vision_endpoint = vision_endpoint
self.vision_token_provider = vision_token_provider
self.prompt_manager = prompt_manager
self.reasoning_effort = reasoning_effort
self.include_token_usage = include_token_usage

def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
include_category = overrides.get("include_category")
Expand Down Expand Up @@ -281,6 +356,93 @@ def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[st
else:
return {"override_prompt": override_prompt}

def get_response_token_limit(self, model: str) -> int:
if model in self.GPT_REASONING_MODELS:
return self.RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT

return self.RESPONSE_DEFAULT_TOKEN_LIMIT

def create_chat_completion(
self,
chatgpt_deployment: Optional[str],
chatgpt_model: str,
messages: list[ChatCompletionMessageParam],
overrides: dict[str, Any],
should_stream: bool = False,
response_token_limit: Optional[int] = None,
tools: Optional[List[ChatCompletionToolParam]] = None,
temperature: Optional[float] = None,
n: Optional[int] = None,
reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
) -> Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]:
response_token_limit = response_token_limit or self.get_response_token_limit(chatgpt_model)
if chatgpt_model in self.GPT_REASONING_MODELS:
params: Dict[str, Any] = {
# max_tokens is not supported
"max_completion_tokens": response_token_limit,
}

# Adjust parameters for reasoning models
supported_features = self.GPT_REASONING_MODELS[chatgpt_model]
if supported_features.streaming and should_stream:
params["stream"] = True
params["stream_options"] = {"include_usage": True}
if supported_features.tools:
params["tools"] = tools
if supported_features.reasoning_effort:
params["reasoning_effort"] = (
reasoning_effort or overrides.get("reasoning_effort") or self.reasoning_effort
)

# For reasoning models that don't support system messages - migrate to developer messages
# https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#developer-messages
if not supported_features.system_messages:
messages = copy.deepcopy(messages)
developer_message = cast(ChatCompletionDeveloperMessageParam, messages[0])
developer_message["role"] = "developer"

else:
# Include parameters that may not be supported for reasoning models
params = {
"max_tokens": response_token_limit,
"temperature": temperature or overrides.get("temperature", 0.3),
"tools": tools,
}
if should_stream:
params["stream"] = True
params["stream_options"] = {"include_usage": True}

# Azure OpenAI takes the deployment name as the model name
return self.openai_client.chat.completions.create(
model=chatgpt_deployment if chatgpt_deployment else chatgpt_model,
messages=messages,
seed=overrides.get("seed", None),
n=n or 1,
**params,
)

def create_generate_thought_step(
self,
title: str,
messages: List[ChatCompletionMessageParam],
overrides: dict[str, Any],
model: str,
deployment: Optional[str],
usage: Optional[CompletionUsage] = None,
reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
) -> ThoughtStep:
properties: Dict[str, Any] = {"model": model}
if deployment:
properties["deployment"] = deployment
# Only add reasoning_effort setting if the model supports it
if (supported_features := self.GPT_REASONING_MODELS.get(model)) and supported_features.reasoning_effort:
properties["reasoning_effort"] = reasoning_effort or overrides.get(
"reasoning_effort", self.reasoning_effort
)
if usage:
properties["token_usage"] = TokenUsageProps.from_completion_usage(usage)
return ThoughtStep(title, messages, properties)

async def run(
self,
messages: list[ChatCompletionMessageParam],
Expand Down
Loading
Loading