6
6
AsyncGenerator ,
7
7
Awaitable ,
8
8
Callable ,
9
+ Dict ,
9
10
List ,
10
11
Optional ,
11
12
TypedDict ,
13
+ Union ,
12
14
cast ,
13
15
)
14
16
from urllib .parse import urljoin
21
23
VectorizedQuery ,
22
24
VectorQuery ,
23
25
)
24
- from openai import AsyncOpenAI
25
- from openai .types .chat import ChatCompletionMessageParam
26
+ from openai import AsyncOpenAI , AsyncStream
27
+ from openai .types import CompletionUsage
28
+ from openai .types .chat import (
29
+ ChatCompletion ,
30
+ ChatCompletionChunk ,
31
+ ChatCompletionMessageParam ,
32
+ ChatCompletionReasoningEffort ,
33
+ ChatCompletionToolParam ,
34
+ )
26
35
27
36
from approaches .promptmanager import PromptManager
28
37
from core .authentication import AuthenticationHelper
@@ -89,8 +98,59 @@ class ThoughtStep:
89
98
description : Optional [Any ]
90
99
props : Optional [dict [str , Any ]] = None
91
100
101
+ def update_token_usage (self , usage : CompletionUsage ) -> None :
102
+ if self .props :
103
+ self .props ["token_usage" ] = TokenUsageProps .from_completion_usage (usage )
104
+
105
+
106
+ @dataclass
107
+ class DataPoints :
108
+ text : Optional [List [str ]] = None
109
+ images : Optional [List ] = None
110
+
111
+
112
+ @dataclass
113
+ class ExtraInfo :
114
+ data_points : DataPoints
115
+ thoughts : Optional [List [ThoughtStep ]] = None
116
+ followup_questions : Optional [List [Any ]] = None
117
+
118
+
119
+ @dataclass
120
+ class TokenUsageProps :
121
+ prompt_tokens : int
122
+ completion_tokens : int
123
+ reasoning_tokens : Optional [int ]
124
+ total_tokens : int
125
+
126
+ @classmethod
127
+ def from_completion_usage (cls , usage : CompletionUsage ) -> "TokenUsageProps" :
128
+ return cls (
129
+ prompt_tokens = usage .prompt_tokens ,
130
+ completion_tokens = usage .completion_tokens ,
131
+ reasoning_tokens = (
132
+ usage .completion_tokens_details .reasoning_tokens if usage .completion_tokens_details else None
133
+ ),
134
+ total_tokens = usage .total_tokens ,
135
+ )
136
+
137
+
138
+ # GPT reasoning models don't support the same set of parameters as other models
139
+ # https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning
140
+ @dataclass
141
+ class GPTReasoningModelSupport :
142
+ streaming : bool
143
+
92
144
93
145
class Approach (ABC ):
146
+ # List of GPT reasoning models support
147
+ GPT_REASONING_MODELS = {
148
+ "o1" : GPTReasoningModelSupport (streaming = False ),
149
+ "o3-mini" : GPTReasoningModelSupport (streaming = True ),
150
+ }
151
+ # Set a higher token limit for GPT reasoning models
152
+ RESPONSE_DEFAULT_TOKEN_LIMIT = 1024
153
+ RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT = 8192
94
154
95
155
def __init__ (
96
156
self ,
@@ -106,6 +166,7 @@ def __init__(
106
166
vision_endpoint : str ,
107
167
vision_token_provider : Callable [[], Awaitable [str ]],
108
168
prompt_manager : PromptManager ,
169
+ reasoning_effort : Optional [str ] = None ,
109
170
):
110
171
self .search_client = search_client
111
172
self .openai_client = openai_client
@@ -119,6 +180,8 @@ def __init__(
119
180
self .vision_endpoint = vision_endpoint
120
181
self .vision_token_provider = vision_token_provider
121
182
self .prompt_manager = prompt_manager
183
+ self .reasoning_effort = reasoning_effort
184
+ self .include_token_usage = True
122
185
123
186
def build_filter (self , overrides : dict [str , Any ], auth_claims : dict [str , Any ]) -> Optional [str ]:
124
187
include_category = overrides .get ("include_category" )
@@ -281,6 +344,81 @@ def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[st
281
344
else :
282
345
return {"override_prompt" : override_prompt }
283
346
347
+ def get_response_token_limit (self , model : str , default_limit : int ) -> int :
348
+ if model in self .GPT_REASONING_MODELS :
349
+ return self .RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT
350
+
351
+ return default_limit
352
+
353
+ def create_chat_completion (
354
+ self ,
355
+ chatgpt_deployment : Optional [str ],
356
+ chatgpt_model : str ,
357
+ messages : list [ChatCompletionMessageParam ],
358
+ overrides : dict [str , Any ],
359
+ response_token_limit : int ,
360
+ should_stream : bool = False ,
361
+ tools : Optional [List [ChatCompletionToolParam ]] = None ,
362
+ temperature : Optional [float ] = None ,
363
+ n : Optional [int ] = None ,
364
+ reasoning_effort : Optional [ChatCompletionReasoningEffort ] = None ,
365
+ ) -> Union [Awaitable [ChatCompletion ], Awaitable [AsyncStream [ChatCompletionChunk ]]]:
366
+ if chatgpt_model in self .GPT_REASONING_MODELS :
367
+ params : Dict [str , Any ] = {
368
+ # max_tokens is not supported
369
+ "max_completion_tokens" : response_token_limit
370
+ }
371
+
372
+ # Adjust parameters for reasoning models
373
+ supported_features = self .GPT_REASONING_MODELS [chatgpt_model ]
374
+ if supported_features .streaming and should_stream :
375
+ params ["stream" ] = True
376
+ params ["stream_options" ] = {"include_usage" : True }
377
+ params ["reasoning_effort" ] = reasoning_effort or overrides .get ("reasoning_effort" ) or self .reasoning_effort
378
+
379
+ else :
380
+ # Include parameters that may not be supported for reasoning models
381
+ params = {
382
+ "max_tokens" : response_token_limit ,
383
+ "temperature" : temperature or overrides .get ("temperature" , 0.3 ),
384
+ }
385
+ if should_stream :
386
+ params ["stream" ] = True
387
+ params ["stream_options" ] = {"include_usage" : True }
388
+
389
+ params ["tools" ] = tools
390
+
391
+ # Azure OpenAI takes the deployment name as the model name
392
+ return self .openai_client .chat .completions .create (
393
+ model = chatgpt_deployment if chatgpt_deployment else chatgpt_model ,
394
+ messages = messages ,
395
+ seed = overrides .get ("seed" , None ),
396
+ n = n or 1 ,
397
+ ** params ,
398
+ )
399
+
400
+ def format_thought_step_for_chatcompletion (
401
+ self ,
402
+ title : str ,
403
+ messages : List [ChatCompletionMessageParam ],
404
+ overrides : dict [str , Any ],
405
+ model : str ,
406
+ deployment : Optional [str ],
407
+ usage : Optional [CompletionUsage ] = None ,
408
+ reasoning_effort : Optional [ChatCompletionReasoningEffort ] = None ,
409
+ ) -> ThoughtStep :
410
+ properties : Dict [str , Any ] = {"model" : model }
411
+ if deployment :
412
+ properties ["deployment" ] = deployment
413
+ # Only add reasoning_effort setting if the model supports it
414
+ if model in self .GPT_REASONING_MODELS :
415
+ properties ["reasoning_effort" ] = reasoning_effort or overrides .get (
416
+ "reasoning_effort" , self .reasoning_effort
417
+ )
418
+ if usage :
419
+ properties ["token_usage" ] = TokenUsageProps .from_completion_usage (usage )
420
+ return ThoughtStep (title , messages , properties )
421
+
284
422
async def run (
285
423
self ,
286
424
messages : list [ChatCompletionMessageParam ],
0 commit comments