Add support for ReasoningEffort.None for latest GPT

kzu · kzu · commit 4667fec57447 · 2025-12-18T23:05:00.000-03:00
This is the default mode for gpt-5.2 as documented at https://platform.openai.com/docs/guides/latest-model#lower-reasoning-effort, but can also be set for other models.
diff --git a/src/Extensions/OpenAI/AzureOpenAIChatClient.cs b/src/Extensions/OpenAI/AzureOpenAIChatClient.cs
@@ -45,11 +45,11 @@ public AzureOpenAIChatClient(Uri endpoint, ApiKeyCredential credential, string m
 
     /// <inheritdoc/>
     public Task<ChatResponse> GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellation = default)
-        => GetChatClient(options?.ModelId ?? modelId).GetResponseAsync(messages, options.SetResponseOptions(), cancellation);
+        => GetChatClient(options?.ModelId ?? modelId).GetResponseAsync(messages, options.ApplyExtensions(), cancellation);
 
     /// <inheritdoc/>
     public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellation = default)
-        => GetChatClient(options?.ModelId ?? modelId).GetStreamingResponseAsync(messages, options.SetResponseOptions(), cancellation);
+        => GetChatClient(options?.ModelId ?? modelId).GetStreamingResponseAsync(messages, options.ApplyExtensions(), cancellation);
 
     IChatClient GetChatClient(string modelId) => clients.GetOrAdd(modelId, model
         => new PipelineClient(pipeline, endpoint, options).GetOpenAIResponseClient(modelId).AsIChatClient());
diff --git a/src/Extensions/OpenAI/OpenAIChatClient.cs b/src/Extensions/OpenAI/OpenAIChatClient.cs
@@ -37,11 +37,11 @@ public OpenAIChatClient(string apiKey, string modelId, OpenAIClientOptions? opti
 
     /// <inheritdoc/>
     public Task<ChatResponse> GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellation = default)
-            => GetChatClient(options?.ModelId ?? modelId).GetResponseAsync(messages, options.SetResponseOptions(), cancellation);
+            => GetChatClient(options?.ModelId ?? modelId).GetResponseAsync(messages, options.ApplyExtensions(), cancellation);
 
     /// <inheritdoc/>
     public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellation = default)
-            => GetChatClient(options?.ModelId ?? modelId).GetStreamingResponseAsync(messages, options.SetResponseOptions(), cancellation);
+            => GetChatClient(options?.ModelId ?? modelId).GetStreamingResponseAsync(messages, options.ApplyExtensions(), cancellation);
 
     IChatClient GetChatClient(string modelId) => clients.GetOrAdd(modelId, model
         => new PipelineClient(pipeline, options).GetOpenAIResponseClient(modelId).AsIChatClient());
diff --git a/src/Extensions/OpenAI/OpenAIExtensions.cs b/src/Extensions/OpenAI/OpenAIExtensions.cs
@@ -5,9 +5,23 @@
 
 namespace Devlooped.Extensions.AI.OpenAI;
 
-static class OpenAIExtensions
+/// <summary>
+/// Allows applying extension properties to the <see cref="ChatOptions"/> when using 
+/// them with an OpenAI client.
+/// </summary>
+public static class OpenAIExtensions
 {
-    public static ChatOptions? SetResponseOptions(this ChatOptions? options)
+    /// <summary>
+    /// Applies the extension properties to the <paramref name="options"/> so that 
+    /// the underlying OpenAI client can properly forward them to the endpoint.
+    /// </summary>
+    /// <remarks>
+    /// Only use this if you are not using <see cref="OpenAIChatClient"/>, which already applies 
+    /// extensions before sending requests.
+    /// </remarks>
+    /// <returns>An options with the right <see cref="ChatOptions.RawRepresentationFactory"/> replaced 
+    /// so it can forward extensions to the underlying OpenAI API.</returns>
+    public static ChatOptions? ApplyExtensions(this ChatOptions? options)
     {
         if (options is null)
             return null;
diff --git a/src/Extensions/ReasoningEffort.cs b/src/Extensions/ReasoningEffort.cs
@@ -5,6 +5,11 @@
 /// </summary>
 public enum ReasoningEffort
 {
+    /// <summary>
+    /// Lowest latency by indicating no reasoning tokens should be spent at all. Support depends on the model.
+    /// </summary>
+    /// <seealso href="https://platform.openai.com/docs/guides/latest-model#lower-reasoning-effort"/>
+    None,
     /// <summary>
     /// Minimal reasoning effort, which may result in faster responses. Support depends on the model.
     /// </summary>
diff --git a/src/Tests/OpenAITests.cs b/src/Tests/OpenAITests.cs
@@ -121,6 +121,7 @@ public async Task GPT5_ThinksFast()
     }
 
     [SecretsTheory("OPENAI_API_KEY")]
+    [InlineData(ReasoningEffort.None)]
     [InlineData(ReasoningEffort.Minimal)]
     [InlineData(ReasoningEffort.Low)]
     [InlineData(ReasoningEffort.Medium)]
@@ -135,7 +136,7 @@ public async Task GPT5_ThinkingTime(ReasoningEffort effort)
 
         var requests = new List<JsonNode>();
 
-        var chat = new OpenAIChatClient(Configuration["OPENAI_API_KEY"]!, "gpt-5-nano",
+        var chat = new OpenAIChatClient(Configuration["OPENAI_API_KEY"]!, "gpt-5.2",
             OpenAIClientOptions.Observable(requests.Add).WriteTo(output));
 
         var options = new ChatOptions
@@ -166,6 +167,45 @@ public async Task GPT5_ThinkingTime(ReasoningEffort effort)
         output.WriteLine($"Effort: {effort}, Time: {watch.ElapsedMilliseconds}ms, Tokens: {response.Usage?.TotalTokenCount}");
     }
 
+    [SecretsFact("OPENAI_API_KEY")]
+    public async Task GPT5_NoReasoningTokens()
+    {
+        var requests = new List<JsonNode>();
+
+        //var chat = new OpenAIChatClient(Configuration["OPENAI_API_KEY"]!, "gpt-4o",
+        //    OpenAIClientOptions.Observable(requests.Add).WriteTo(output));
+
+        var chat = new OpenAIClient(new ApiKeyCredential(Configuration["OPENAI_API_KEY"]!),
+            OpenAIClientOptions.Observable(requests.Add).WriteTo(output))
+            .GetOpenAIResponseClient("gpt-4o")
+            .AsIChatClient();
+
+        var reasoned = await chat.GetResponseAsync(
+            "How much gold would it take to coat the Statue of Liberty in a 1mm layer?",
+            new ChatOptions
+            {
+                ModelId = "gpt-5.1",
+                ReasoningEffort = ReasoningEffort.Low
+            }.ApplyExtensions());
+
+        Assert.StartsWith("gpt-5.1", reasoned.ModelId);
+        Assert.NotNull(reasoned.Usage?.AdditionalCounts);
+        Assert.True(reasoned.Usage.AdditionalCounts.ContainsKey("OutputTokenDetails.ReasoningTokenCount"));
+        Assert.True(reasoned.Usage.AdditionalCounts["OutputTokenDetails.ReasoningTokenCount"] > 0);
+
+        var nonreasoned = await chat.GetResponseAsync(
+            "How much gold would it take to coat the Statue of Liberty in a 1mm layer?",
+            new ChatOptions
+            {
+                ModelId = "gpt-5.1",
+                ReasoningEffort = ReasoningEffort.None
+            }.ApplyExtensions());
+
+        Assert.NotNull(nonreasoned.Usage?.AdditionalCounts);
+        Assert.True(nonreasoned.Usage.AdditionalCounts.ContainsKey("OutputTokenDetails.ReasoningTokenCount"));
+        Assert.True(nonreasoned.Usage.AdditionalCounts["OutputTokenDetails.ReasoningTokenCount"] == 0);
+    }
+
     [SecretsTheory("OPENAI_API_KEY")]
     [InlineData(Verbosity.Low)]
     [InlineData(Verbosity.Medium)]