6
6
AsyncGenerator ,
7
7
Awaitable ,
8
8
Callable ,
9
+ Dict ,
9
10
List ,
10
11
Optional ,
11
12
TypedDict ,
13
+ Union ,
12
14
cast ,
13
15
)
14
16
from urllib .parse import urljoin
21
23
VectorizedQuery ,
22
24
VectorQuery ,
23
25
)
24
- from openai import AsyncOpenAI
25
- from openai .types .chat import ChatCompletionMessageParam
26
+ from openai import AsyncOpenAI , AsyncStream
27
+ from openai .types import CompletionUsage
28
+ from openai .types .chat import (
29
+ ChatCompletion ,
30
+ ChatCompletionChunk ,
31
+ ChatCompletionMessageParam ,
32
+ ChatCompletionReasoningEffort ,
33
+ ChatCompletionToolParam ,
34
+ )
26
35
27
36
from approaches .promptmanager import PromptManager
28
37
from core .authentication import AuthenticationHelper
@@ -91,8 +100,59 @@ class ThoughtStep:
91
100
description : Optional [Any ]
92
101
props : Optional [dict [str , Any ]] = None
93
102
103
+ def update_token_usage (self , usage : CompletionUsage ) -> None :
104
+ if self .props :
105
+ self .props ["token_usage" ] = TokenUsageProps .from_completion_usage (usage )
106
+
107
+
108
+ @dataclass
109
+ class DataPoints :
110
+ text : Optional [List [str ]] = None
111
+ images : Optional [List ] = None
112
+
113
+
114
+ @dataclass
115
+ class ExtraInfo :
116
+ data_points : DataPoints
117
+ thoughts : Optional [List [ThoughtStep ]] = None
118
+ followup_questions : Optional [List [Any ]] = None
119
+
120
+
121
+ @dataclass
122
+ class TokenUsageProps :
123
+ prompt_tokens : int
124
+ completion_tokens : int
125
+ reasoning_tokens : Optional [int ]
126
+ total_tokens : int
127
+
128
+ @classmethod
129
+ def from_completion_usage (cls , usage : CompletionUsage ) -> "TokenUsageProps" :
130
+ return cls (
131
+ prompt_tokens = usage .prompt_tokens ,
132
+ completion_tokens = usage .completion_tokens ,
133
+ reasoning_tokens = (
134
+ usage .completion_tokens_details .reasoning_tokens if usage .completion_tokens_details else None
135
+ ),
136
+ total_tokens = usage .total_tokens ,
137
+ )
138
+
139
+
140
+ # GPT reasoning models don't support the same set of parameters as other models
141
+ # https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning
142
+ @dataclass
143
+ class GPTReasoningModelSupport :
144
+ streaming : bool
145
+
94
146
95
147
class Approach (ABC ):
148
+ # List of GPT reasoning models support
149
+ GPT_REASONING_MODELS = {
150
+ "o1" : GPTReasoningModelSupport (streaming = False ),
151
+ "o3-mini" : GPTReasoningModelSupport (streaming = True ),
152
+ }
153
+ # Set a higher token limit for GPT reasoning models
154
+ RESPONSE_DEFAULT_TOKEN_LIMIT = 1024
155
+ RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT = 8192
96
156
97
157
def __init__ (
98
158
self ,
@@ -109,6 +169,7 @@ def __init__(
109
169
vision_endpoint : str ,
110
170
vision_token_provider : Callable [[], Awaitable [str ]],
111
171
prompt_manager : PromptManager ,
172
+ reasoning_effort : Optional [str ] = None ,
112
173
):
113
174
self .search_client = search_client
114
175
self .openai_client = openai_client
@@ -123,6 +184,8 @@ def __init__(
123
184
self .vision_endpoint = vision_endpoint
124
185
self .vision_token_provider = vision_token_provider
125
186
self .prompt_manager = prompt_manager
187
+ self .reasoning_effort = reasoning_effort
188
+ self .include_token_usage = True
126
189
127
190
def build_filter (self , overrides : dict [str , Any ], auth_claims : dict [str , Any ]) -> Optional [str ]:
128
191
include_category = overrides .get ("include_category" )
@@ -286,6 +349,81 @@ def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[st
286
349
else :
287
350
return {"override_prompt" : override_prompt }
288
351
352
+ def get_response_token_limit (self , model : str , default_limit : int ) -> int :
353
+ if model in self .GPT_REASONING_MODELS :
354
+ return self .RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT
355
+
356
+ return default_limit
357
+
358
+ def create_chat_completion (
359
+ self ,
360
+ chatgpt_deployment : Optional [str ],
361
+ chatgpt_model : str ,
362
+ messages : list [ChatCompletionMessageParam ],
363
+ overrides : dict [str , Any ],
364
+ response_token_limit : int ,
365
+ should_stream : bool = False ,
366
+ tools : Optional [List [ChatCompletionToolParam ]] = None ,
367
+ temperature : Optional [float ] = None ,
368
+ n : Optional [int ] = None ,
369
+ reasoning_effort : Optional [ChatCompletionReasoningEffort ] = None ,
370
+ ) -> Union [Awaitable [ChatCompletion ], Awaitable [AsyncStream [ChatCompletionChunk ]]]:
371
+ if chatgpt_model in self .GPT_REASONING_MODELS :
372
+ params : Dict [str , Any ] = {
373
+ # max_tokens is not supported
374
+ "max_completion_tokens" : response_token_limit
375
+ }
376
+
377
+ # Adjust parameters for reasoning models
378
+ supported_features = self .GPT_REASONING_MODELS [chatgpt_model ]
379
+ if supported_features .streaming and should_stream :
380
+ params ["stream" ] = True
381
+ params ["stream_options" ] = {"include_usage" : True }
382
+ params ["reasoning_effort" ] = reasoning_effort or overrides .get ("reasoning_effort" ) or self .reasoning_effort
383
+
384
+ else :
385
+ # Include parameters that may not be supported for reasoning models
386
+ params = {
387
+ "max_tokens" : response_token_limit ,
388
+ "temperature" : temperature or overrides .get ("temperature" , 0.3 ),
389
+ }
390
+ if should_stream :
391
+ params ["stream" ] = True
392
+ params ["stream_options" ] = {"include_usage" : True }
393
+
394
+ params ["tools" ] = tools
395
+
396
+ # Azure OpenAI takes the deployment name as the model name
397
+ return self .openai_client .chat .completions .create (
398
+ model = chatgpt_deployment if chatgpt_deployment else chatgpt_model ,
399
+ messages = messages ,
400
+ seed = overrides .get ("seed" , None ),
401
+ n = n or 1 ,
402
+ ** params ,
403
+ )
404
+
405
+ def format_thought_step_for_chatcompletion (
406
+ self ,
407
+ title : str ,
408
+ messages : List [ChatCompletionMessageParam ],
409
+ overrides : dict [str , Any ],
410
+ model : str ,
411
+ deployment : Optional [str ],
412
+ usage : Optional [CompletionUsage ] = None ,
413
+ reasoning_effort : Optional [ChatCompletionReasoningEffort ] = None ,
414
+ ) -> ThoughtStep :
415
+ properties : Dict [str , Any ] = {"model" : model }
416
+ if deployment :
417
+ properties ["deployment" ] = deployment
418
+ # Only add reasoning_effort setting if the model supports it
419
+ if model in self .GPT_REASONING_MODELS :
420
+ properties ["reasoning_effort" ] = reasoning_effort or overrides .get (
421
+ "reasoning_effort" , self .reasoning_effort
422
+ )
423
+ if usage :
424
+ properties ["token_usage" ] = TokenUsageProps .from_completion_usage (usage )
425
+ return ThoughtStep (title , messages , properties )
426
+
289
427
async def run (
290
428
self ,
291
429
messages : list [ChatCompletionMessageParam ],
0 commit comments