Simplify approach to Grok chat client and dynamic clients

kzu · kzu · commit 35ad864f37f8 · 2025-07-02T17:04:08.000-03:00
We want to provide the intuitive behavior of honoring the ChatOptions.ModelId, which the OpenAI client doesn't do. In order to achieve this, we implement the switching in the new GrokChatClient instead, and keep the GrokClient as a fallback for existing code or other non-chat scenarios where the same endpoints might work (but it's not guaranteed at all).
diff --git a/src/AI.Tests/AI.Tests.csproj b/src/AI.Tests/AI.Tests.csproj
@@ -1,7 +1,7 @@
 ﻿<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>net8.0;net10.0</TargetFrameworks>
+    <TargetFramework>net10.0</TargetFramework>
     <NoWarn>OPENAI001;$(NoWarn)</NoWarn>
     <EmitCompilerGeneratedFiles>true</EmitCompilerGeneratedFiles>
   </PropertyGroup>
diff --git a/src/AI.Tests/GrokTests.cs b/src/AI.Tests/GrokTests.cs
@@ -16,18 +16,19 @@ public async Task GrokInvokesTools()
             { "user", "What day is today?" },
         };
 
-        var grok = new GrokClient(Configuration["XAI_API_KEY"]!);
+        var chat = new GrokChatClient(Configuration["XAI_API_KEY"]!);
 
         var options = new GrokChatOptions
         {
             ModelId = "grok-3-mini",
             Search = GrokSearch.Auto,
-            Tools = [AIFunctionFactory.Create(() => DateTimeOffset.Now.ToString("O"), "get_date")]
+            Tools = [AIFunctionFactory.Create(() => DateTimeOffset.Now.ToString("O"), "get_date")],
+            AdditionalProperties = new()
+            {
+                { "foo", "bar" }
+            }
         };
 
-        var client = grok.GetChatClient("grok-3");
-        var chat = Assert.IsType<IChatClient>(client, false);
-
         var response = await chat.GetResponseAsync(messages, options);
         var getdate = response.Messages
             .SelectMany(x => x.Contents.OfType<FunctionCallContent>())
@@ -50,9 +51,7 @@ public async Task GrokInvokesToolAndSearch()
 
         var transport = new TestPipelineTransport(HttpClientPipelineTransport.Shared, output);
 
-        var grok = new GrokClient(Configuration["XAI_API_KEY"]!, new OpenAI.OpenAIClientOptions() { Transport = transport })
-            .GetChatClient("grok-3")
-            .AsIChatClient()
+        var grok = new GrokChatClient(Configuration["XAI_API_KEY"]!, "grok-3", new OpenAI.OpenAIClientOptions() { Transport = transport })
             .AsBuilder()
             .UseFunctionInvocation()
             .Build();
@@ -103,9 +102,7 @@ public async Task GrokInvokesHostedSearchTool()
 
         var transport = new TestPipelineTransport(HttpClientPipelineTransport.Shared, output);
 
-        var grok = new GrokClient(Configuration["XAI_API_KEY"]!, new OpenAI.OpenAIClientOptions() { Transport = transport });
-        var client = grok.GetChatClient("grok-3");
-        var chat = Assert.IsType<IChatClient>(client, false);
+        var chat = new GrokChatClient(Configuration["XAI_API_KEY"]!, "grok-3", new OpenAI.OpenAIClientOptions() { Transport = transport });
 
         var options = new ChatOptions
         {
diff --git a/src/AI/ChatExtensions.cs b/src/AI/ChatExtensions.cs
@@ -20,4 +20,28 @@ public static class ChatExtensions
         public Task<ChatResponse> GetResponseAsync(Chat chat, ChatOptions? options = null, CancellationToken cancellation = default)
             => client.GetResponseAsync((IEnumerable<ChatMessage>)chat, options, cancellation);
     }
+
+    extension(ChatOptions options)
+    {
+        /// <summary>
+        /// Sets the effort level for a reasoning AI model when generating responses, if supported 
+        /// by the model.
+        /// </summary>
+        public ReasoningEffort? ReasoningEffort
+        {
+            get => options.AdditionalProperties?.TryGetValue("reasoning_effort", out var value) == true && value is ReasoningEffort effort ? effort : null;
+            set
+            {
+                if (value is not null)
+                {
+                    options.AdditionalProperties ??= [];
+                    options.AdditionalProperties["reasoning_effort"] = value;
+                }
+                else
+                {
+                    options.AdditionalProperties?.Remove("reasoning_effort");
+                }
+            }
+        }
+    }
 }
diff --git a/src/AI/Grok/GrokChatClient.cs b/src/AI/Grok/GrokChatClient.cs
@@ -0,0 +1,116 @@
+﻿using System.ClientModel;
+using System.ClientModel.Primitives;
+using System.Collections.Concurrent;
+using System.Text.Json;
+using Microsoft.Extensions.AI;
+using OpenAI;
+
+namespace Devlooped.Extensions.AI;
+
+/// <summary>
+/// An <see cref="IChatClient"/> implementation for Grok.
+/// </summary>
+public class GrokChatClient : IChatClient
+{
+    readonly ConcurrentDictionary<string, IChatClient> clients = new();
+    readonly string apiKey;
+    readonly string modelId;
+    readonly ClientPipeline pipeline;
+    readonly OpenAIClientOptions options;
+
+    /// <summary>
+    /// Initializes the client with the specified API key and the default model ID "grok-3-mini".
+    /// </summary>
+    public GrokChatClient(string apiKey) : this(apiKey, "grok-3-mini", null) { }
+
+    /// <summary>
+    /// Initializes the client with the specified API key, model ID, and optional OpenAI client options.
+    /// </summary>
+    public GrokChatClient(string apiKey, string modelId, OpenAIClientOptions? options = default)
+    {
+        this.apiKey = apiKey;
+        this.modelId = modelId;
+        this.options = options ?? new();
+        this.options.Endpoint ??= new Uri("https://api.x.ai/v1");
+
+        // NOTE: by caching the pipeline, we speed up creation of new chat clients per model, 
+        // since the pipeline will be the same for all of them.
+        pipeline = new OpenAIClient(new ApiKeyCredential(apiKey), options).Pipeline;
+    }
+
+    /// <inheritdoc/>
+    public Task<ChatResponse> GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellation = default)
+            => GetChatClient(options?.ModelId ?? modelId).GetResponseAsync(messages, SetOptions(options), cancellation);
+
+    /// <inheritdoc/>
+    public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellation = default)
+            => GetChatClient(options?.ModelId ?? modelId).GetStreamingResponseAsync(messages, SetOptions(options), cancellation);
+
+    IChatClient GetChatClient(string modelId) => clients.GetOrAdd(modelId, model
+        => new PipelineClient(pipeline, options).GetChatClient(modelId).AsIChatClient());
+
+    static ChatOptions? SetOptions(ChatOptions? options)
+    {
+        if (options is null)
+            return null;
+
+        options.RawRepresentationFactory = _ =>
+        {
+            var result = new GrokCompletionOptions();
+            var grok = options as GrokChatOptions;
+            var search = grok?.Search;
+
+            if (options.Tools != null)
+            {
+                if (options.Tools.OfType<GrokSearchTool>().FirstOrDefault() is GrokSearchTool grokSearch)
+                    search = grokSearch.Mode;
+                else if (options.Tools.OfType<HostedWebSearchTool>().FirstOrDefault() is HostedWebSearchTool webSearch)
+                    search = GrokSearch.Auto;
+
+                // Grok doesn't support any other hosted search tools, so remove remaining ones
+                // so they don't get copied over by the OpenAI client.
+                //options.Tools = [.. options.Tools.Where(tool => tool is not HostedWebSearchTool)];
+            }
+
+            if (search != null)
+                result.Search = search.Value;
+
+            if (grok?.ReasoningEffort != null)
+            {
+                result.ReasoningEffortLevel = grok.ReasoningEffort switch
+                {
+                    ReasoningEffort.Low => OpenAI.Chat.ChatReasoningEffortLevel.Low,
+                    ReasoningEffort.High => OpenAI.Chat.ChatReasoningEffortLevel.High,
+                    _ => throw new ArgumentException($"Unsupported reasoning effort {grok.ReasoningEffort}")
+                };
+            }
+
+            return result;
+        };
+
+        return options;
+    }
+
+    void IDisposable.Dispose() { }
+
+    public object? GetService(Type serviceType, object? serviceKey = null) => null;
+
+    // Allows creating the base OpenAIClient with a pre-created pipeline.
+    class PipelineClient(ClientPipeline pipeline, OpenAIClientOptions options) : OpenAIClient(pipeline, options) { }
+
+    class GrokCompletionOptions : OpenAI.Chat.ChatCompletionOptions
+    {
+        public GrokSearch Search { get; set; } = GrokSearch.Auto;
+
+        protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions? options)
+        {
+            base.JsonModelWriteCore(writer, options);
+
+            // "search_parameters": { "mode": "auto" } 
+            writer.WritePropertyName("search_parameters");
+            writer.WriteStartObject();
+            writer.WriteString("mode", Search.ToString().ToLowerInvariant());
+            writer.WriteEndObject();
+        }
+    }
+}
diff --git a/src/AI/Grok/GrokClient.cs b/src/AI/Grok/GrokClient.cs
@@ -1,113 +1,45 @@
 ﻿using System.ClientModel;
 using System.ClientModel.Primitives;
 using System.Collections.Concurrent;
-using System.Text.Json;
 using Microsoft.Extensions.AI;
 using OpenAI;
 
 namespace Devlooped.Extensions.AI;
 
-public class GrokClient(string apiKey, OpenAIClientOptions options)
+/// <summary>
+/// Provides an OpenAI compability client for Grok. It's recommended you 
+/// use <see cref="GrokChatClient"/> directly for chat-only scenarios.
+/// </summary>
+public class GrokClient(string apiKey, OpenAIClientOptions? options = null)
     : OpenAIClient(new ApiKeyCredential(apiKey), EnsureEndpoint(options))
 {
-    // This allows ChatOptions to request a different model than the one configured 
-    // in the chat pipeline when GetChatClient(model).AsIChatClient() is called at registration time.
-    readonly ConcurrentDictionary<string, GrokChatClientAdapter> adapters = new();
     readonly ConcurrentDictionary<string, IChatClient> clients = new();
 
-    public GrokClient(string apiKey)
-        : this(apiKey, new())
-    {
-    }
+    /// <summary>
+    /// Initializes a new instance of the <see cref="GrokClient"/> with the specified API key.
+    /// </summary>
+    public GrokClient(string apiKey) : this(apiKey, new()) { }
 
-    IChatClient GetChatClientImpl(string model)
-        // Gets the real chat client by prefixing so the overload invokes the base.
-        => clients.GetOrAdd(model, key => GetChatClient("__" + model).AsIChatClient());
+    IChatClient GetChatClientImpl(string model) => clients.GetOrAdd(model, key => new GrokChatClient(apiKey, key, options));
 
     /// <summary>
     /// Returns an adapter that surfaces an <see cref="IChatClient"/> interface that 
     /// can be used directly in the <see cref="ChatClientBuilder"/> pipeline builder.
     /// </summary>
-    public override OpenAI.Chat.ChatClient GetChatClient(string model)
-        // We need to differentiate getting a real chat client vs an adapter for pipeline setup.
-        // The former is invoked by the adapter when it needs to invoke the actual chat client, 
-        // which goes through the GetChatClientImpl. Since the method override is necessary to 
-        // satisfy the usage pattern when configuring OpenAIClient with M.E.AI, we differentiate 
-        // the internal call by adding a prefix we remove before calling downstream.
-        => model.StartsWith("__") ? base.GetChatClient(model[2..]) : new GrokChatClientAdapter(this, model);
-
-    static OpenAIClientOptions EnsureEndpoint(OpenAIClientOptions options)
-    {
-        if (options.Endpoint is null)
-            options.Endpoint = new Uri("https://api.x.ai/v1");
-
-        return options;
-    }
+    public override OpenAI.Chat.ChatClient GetChatClient(string model) => new GrokChatClientAdapter(this, model);
 
-    static ChatOptions? SetOptions(ChatOptions? options)
+    static OpenAIClientOptions EnsureEndpoint(OpenAIClientOptions? options)
     {
-        if (options is null)
-            return null;
-
-        options.RawRepresentationFactory = _ =>
-        {
-            var result = new GrokCompletionOptions();
-            var grok = options as GrokChatOptions;
-            var search = grok?.Search;
-
-            if (options.Tools != null)
-            {
-                if (options.Tools.OfType<GrokSearchTool>().FirstOrDefault() is GrokSearchTool grokSearch)
-                    search = grokSearch.Mode;
-                else if (options.Tools.OfType<HostedWebSearchTool>().FirstOrDefault() is HostedWebSearchTool webSearch)
-                    search = GrokSearch.Auto;
-
-                // Grok doesn't support any other hosted search tools, so remove remaining ones
-                // so they don't get copied over by the OpenAI client.
-                //options.Tools = [.. options.Tools.Where(tool => tool is not HostedWebSearchTool)];
-            }
-
-            if (search != null)
-                result.Search = search.Value;
-
-            if (grok?.ReasoningEffort != null)
-            {
-                result.ReasoningEffortLevel = grok.ReasoningEffort switch
-                {
-                    ReasoningEffort.Low => OpenAI.Chat.ChatReasoningEffortLevel.Low,
-                    ReasoningEffort.High => OpenAI.Chat.ChatReasoningEffortLevel.High,
-                    _ => throw new ArgumentException($"Unsupported reasoning effort {grok.ReasoningEffort}")
-                };
-            }
-
-            return result;
-        };
-
+        options ??= new();
+        options.Endpoint ??= new Uri("https://api.x.ai/v1");
         return options;
     }
 
-    class SearchParameters
-    {
-        public GrokSearch Mode { get; set; } = GrokSearch.Auto;
-    }
-
-    class GrokCompletionOptions : OpenAI.Chat.ChatCompletionOptions
-    {
-        public GrokSearch Search { get; set; } = GrokSearch.Auto;
-
-        protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions? options)
-        {
-            base.JsonModelWriteCore(writer, options);
-
-            // "search_parameters": { "mode": "auto" } 
-            writer.WritePropertyName("search_parameters");
-            writer.WriteStartObject();
-            writer.WriteString("mode", Search.ToString().ToLowerInvariant());
-            writer.WriteEndObject();
-        }
-    }
-
-    public class GrokChatClientAdapter(GrokClient client, string model) : OpenAI.Chat.ChatClient, IChatClient
+    // This adapter is provided for compatibility with the documented usage for 
+    // OpenAI in MEAI docs. Most typical case would be to just create an <see cref="GrokChatClient"/> directly.
+    // This throws on any non-IChatClient invoked methods in the AsIChatClient adapter, and 
+    // forwards the IChatClient methods to the GrokChatClient implementation which is cached per client.
+    class GrokChatClientAdapter(GrokClient client, string model) : OpenAI.Chat.ChatClient, IChatClient
     {
         void IDisposable.Dispose() { }
 
@@ -118,14 +50,14 @@ void IDisposable.Dispose() { }
         /// the default model when the adapter was created.
         /// </summary>
         Task<ChatResponse> IChatClient.GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options, CancellationToken cancellation)
-            => client.GetChatClientImpl(options?.ModelId ?? model).GetResponseAsync(messages, SetOptions(options), cancellation);
+            => client.GetChatClientImpl(options?.ModelId ?? model).GetResponseAsync(messages, options, cancellation);
 
         /// <summary>
         /// Routes the request to a client that matches the options' ModelId (if set), or 
         /// the default model when the adapter was created.
         /// </summary>
         IAsyncEnumerable<ChatResponseUpdate> IChatClient.GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options, CancellationToken cancellation)
-            => client.GetChatClientImpl(options?.ModelId ?? model).GetStreamingResponseAsync(messages, SetOptions(options), cancellation);
+            => client.GetChatClientImpl(options?.ModelId ?? model).GetStreamingResponseAsync(messages, options, cancellation);
 
         // These are the only two methods actually invoked by the AsIChatClient adapter from M.E.AI.OpenAI
         public override Task<ClientResult<OpenAI.Chat.ChatCompletion>> CompleteChatAsync(IEnumerable<OpenAI.Chat.ChatMessage>? messages, OpenAI.Chat.ChatCompletionOptions? options = null, CancellationToken cancellationToken = default)
diff --git a/src/AI/GrokExtensions.cs b/src/AI/GrokExtensions.cs
@@ -0,0 +1,8 @@
+﻿namespace Devlooped.Extensions.AI;
+
+/// <summary>
+/// 
+/// </summary>
+public static class GrokExtensions
+{
+}
diff --git a/src/AI/ReasoningEffort.cs b/src/AI/ReasoningEffort.cs
@@ -1,6 +1,20 @@
 ﻿namespace Devlooped.Extensions.AI;
 
 /// <summary>
-/// Reasoning effort an AI should apply when generating a response.
+/// Effort a reasoning model should apply when generating a response.
 /// </summary>
-public enum ReasoningEffort { Low, High }
+public enum ReasoningEffort
+{
+    /// <summary>
+    /// Low effort reasoning, which may result in faster responses but less detailed or accurate answers.
+    /// </summary>
+    Low,
+    /// <summary>
+    /// Grok in particular does not support this mode, so it will default to <see cref="Low"/>.
+    /// </summary>
+    Medium,
+    /// <summary>
+    /// High effort reasoning, which may take longer but provides more detailed and accurate responses.
+    /// </summary>
+    High
+}