diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs b/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs
index 8bd673de3aa2..6e375a158197 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs
@@ -92,22 +92,50 @@ public static ChatChoice ChatChoice(int index = default, CompletionsFinishReason
         /// The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
         /// completions request to resolve as configured.
         /// </param>
+        /// <param name="reasoningContent"> The reasoning content the model used for generating the response. </param>
         /// <returns> A new <see cref="Inference.ChatResponseMessage"/> instance for mocking. </returns>
-        public static ChatResponseMessage ChatResponseMessage(ChatRole role = default, string content = null, IEnumerable<ChatCompletionsToolCall> toolCalls = null)
+        public static ChatResponseMessage ChatResponseMessage(ChatRole role = default, string content = null, IEnumerable<ChatCompletionsToolCall> toolCalls = null, string reasoningContent = null)
         {
             toolCalls ??= new List<ChatCompletionsToolCall>();
 
-            return new ChatResponseMessage(role, content, toolCalls?.ToList(), serializedAdditionalRawData: null);
+            return new ChatResponseMessage(role, content, toolCalls?.ToList(), reasoningContent, serializedAdditionalRawData: null);
         }
 
         /// <summary> Initializes a new instance of <see cref="Inference.CompletionsUsage"/>. </summary>
         /// <param name="completionTokens"> The number of tokens generated across all completions emissions. </param>
         /// <param name="promptTokens"> The number of tokens in the provided prompts for the completions request. </param>
         /// <param name="totalTokens"> The total number of tokens processed for the completions request and response. </param>
+        /// <param name="completionTokensDetails"> Breakdown of tokens used in a completion. </param>
+        /// <param name="promptTokensDetails"> Breakdown of tokens used in the prompt/chat history. </param>
         /// <returns> A new <see cref="Inference.CompletionsUsage"/> instance for mocking. </returns>
-        public static CompletionsUsage CompletionsUsage(int completionTokens = default, int promptTokens = default, int totalTokens = default)
+        public static CompletionsUsage CompletionsUsage(int completionTokens = default, int promptTokens = default, int totalTokens = default, CompletionsUsageDetails completionTokensDetails = null, PromptUsageDetails promptTokensDetails = null)
         {
-            return new CompletionsUsage(completionTokens, promptTokens, totalTokens, serializedAdditionalRawData: null);
+            return new CompletionsUsage(
+                completionTokens,
+                promptTokens,
+                totalTokens,
+                completionTokensDetails,
+                promptTokensDetails,
+                serializedAdditionalRawData: null);
+        }
+
+        /// <summary> Initializes a new instance of <see cref="Inference.CompletionsUsageDetails"/>. </summary>
+        /// <param name="audioTokens"> The number of tokens corresponding to audio input. </param>
+        /// <param name="reasoningTokens"> The number of tokens corresponding to reasoning. </param>
+        /// <param name="totalTokens"> The total number of tokens processed for the completions request and response. </param>
+        /// <returns> A new <see cref="Inference.CompletionsUsageDetails"/> instance for mocking. </returns>
+        public static CompletionsUsageDetails CompletionsUsageDetails(int audioTokens = default, int reasoningTokens = default, int totalTokens = default)
+        {
+            return new CompletionsUsageDetails(audioTokens, reasoningTokens, totalTokens, serializedAdditionalRawData: null);
+        }
+
+        /// <summary> Initializes a new instance of <see cref="Inference.PromptUsageDetails"/>. </summary>
+        /// <param name="audioTokens"> The number of tokens corresponding to audio input. </param>
+        /// <param name="cachedTokens"> The total number of tokens cached. </param>
+        /// <returns> A new <see cref="Inference.PromptUsageDetails"/> instance for mocking. </returns>
+        public static PromptUsageDetails PromptUsageDetails(int audioTokens = default, int cachedTokens = default)
+        {
+            return new PromptUsageDetails(audioTokens, cachedTokens, serializedAdditionalRawData: null);
         }
 
         /// <summary> Initializes a new instance of <see cref="Inference.ModelInfo"/>. </summary>
@@ -213,12 +241,13 @@ public static StreamingChatChoiceUpdate StreamingChatChoiceUpdate(int index = de
         /// The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
         /// completions request to resolve as configured.
         /// </param>
+        /// <param name="reasoningContent"> The reasoning content the model used for generating the response. </param>
         /// <returns> A new <see cref="Inference.StreamingChatResponseMessageUpdate"/> instance for mocking. </returns>
-        public static StreamingChatResponseMessageUpdate StreamingChatResponseMessageUpdate(ChatRole? role = null, string content = null, IEnumerable<StreamingChatResponseToolCallUpdate> toolCalls = null)
+        public static StreamingChatResponseMessageUpdate StreamingChatResponseMessageUpdate(ChatRole? role = null, string content = null, IEnumerable<StreamingChatResponseToolCallUpdate> toolCalls = null, string reasoningContent = null)
         {
             toolCalls ??= new List<StreamingChatResponseToolCallUpdate>();
 
-            return new StreamingChatResponseMessageUpdate(role, content, toolCalls?.ToList(), serializedAdditionalRawData: null);
+            return new StreamingChatResponseMessageUpdate(role, content, toolCalls?.ToList(), reasoningContent, serializedAdditionalRawData: null);
         }
 
         /// <summary> Initializes a new instance of <see cref="Inference.StreamingChatResponseToolCallUpdate"/>. </summary>
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs b/sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs
index 46dcb4716cad..19ad5ae20fdf 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs
@@ -13,13 +13,13 @@ namespace Azure.AI.Inference
     /// <summary> Client options for Azure.AI.Inference library clients. </summary>
     public partial class AzureAIInferenceClientOptions : ClientOptions
     {
-        private const ServiceVersion LatestVersion = ServiceVersion.V2024_05_01_Preview;
+        private const ServiceVersion LatestVersion = ServiceVersion.V2025_04_01;
 
         /// <summary> The version of the service to use. </summary>
         public enum ServiceVersion
         {
-            /// <summary> Service version "2024-05-01-preview". </summary>
-            V2024_05_01_Preview = 1,
+            /// <summary> Service version "2025-04-01". </summary>
+            V2025_04_01 = 1,
         }
 
         internal string Version { get; }
@@ -29,7 +29,7 @@ public AzureAIInferenceClientOptions(ServiceVersion version = LatestVersion)
         {
             Version = version switch
             {
-                ServiceVersion.V2024_05_01_Preview => "2024-05-01-preview",
+                ServiceVersion.V2025_04_01 => "2025-04-01",
                 _ => throw new NotSupportedException()
             };
         }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs
index 4bd774289ef0..cc3a010486ba 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs
@@ -20,7 +20,7 @@ public partial class ChatCompletionsClient
         private const string AuthorizationHeader = "Authorization";
         private readonly AzureKeyCredential _keyCredential;
         private const string AuthorizationApiKeyPrefix = "Bearer";
-        private static readonly string[] AuthorizationScopes = new string[] { "https://ml.azure.com/.default" };
+        private static readonly string[] AuthorizationScopes = new string[] { "https://cognitiveservices.azure.com/.default" };
         private readonly TokenCredential _tokenCredential;
         private readonly HttpPipeline _pipeline;
         private readonly Uri _endpoint;
@@ -75,14 +75,15 @@ public ChatCompletionsClient(Uri endpoint, TokenCredential credential, AzureAIIn
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(CancellationToken)']/*" />
-        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationToken cancellationToken = default)
+        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(string,CancellationToken)']/*" />
+        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(string model = null, CancellationToken cancellationToken = default)
         {
             RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = await GetModelInfoAsync(context).ConfigureAwait(false);
+            Response response = await GetModelInfoAsync(model, context).ConfigureAwait(false);
             return Response.FromValue(ModelInfo.FromResponse(response), response);
         }
 
@@ -90,14 +91,15 @@ public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationTok
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfo(CancellationToken)']/*" />
-        public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationToken = default)
+        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfo(string,CancellationToken)']/*" />
+        public virtual Response<ModelInfo> GetModelInfo(string model = null, CancellationToken cancellationToken = default)
         {
             RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = GetModelInfo(context);
+            Response response = GetModelInfo(model, context);
             return Response.FromValue(ModelInfo.FromResponse(response), response);
         }
 
@@ -105,7 +107,7 @@ public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationTo
         /// [Protocol Method] Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// <list type="bullet">
         /// <item>
         /// <description>
@@ -114,22 +116,23 @@ public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationTo
         /// </item>
         /// <item>
         /// <description>
-        /// Please try the simpler <see cref="GetModelInfoAsync(CancellationToken)"/> convenience overload with strongly typed models first.
+        /// Please try the simpler <see cref="GetModelInfoAsync(string,CancellationToken)"/> convenience overload with strongly typed models first.
         /// </description>
         /// </item>
         /// </list>
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
         /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
         /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(RequestContext)']/*" />
-        public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
+        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(string,RequestContext)']/*" />
+        public virtual async Task<Response> GetModelInfoAsync(string model, RequestContext context)
         {
             using var scope = ClientDiagnostics.CreateScope("ChatCompletionsClient.GetModelInfo");
             scope.Start();
             try
             {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
+                using HttpMessage message = CreateGetModelInfoRequest(model, context);
                 return await _pipeline.ProcessMessageAsync(message, context).ConfigureAwait(false);
             }
             catch (Exception e)
@@ -143,7 +146,7 @@ public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
         /// [Protocol Method] Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// <list type="bullet">
         /// <item>
         /// <description>
@@ -152,22 +155,23 @@ public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
         /// </item>
         /// <item>
         /// <description>
-        /// Please try the simpler <see cref="GetModelInfo(CancellationToken)"/> convenience overload with strongly typed models first.
+        /// Please try the simpler <see cref="GetModelInfo(string,CancellationToken)"/> convenience overload with strongly typed models first.
         /// </description>
         /// </item>
         /// </list>
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
         /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
         /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfo(RequestContext)']/*" />
-        public virtual Response GetModelInfo(RequestContext context)
+        /// <include file="Docs/ChatCompletionsClient.xml" path="doc/members/member[@name='GetModelInfo(string,RequestContext)']/*" />
+        public virtual Response GetModelInfo(string model, RequestContext context)
         {
             using var scope = ClientDiagnostics.CreateScope("ChatCompletionsClient.GetModelInfo");
             scope.Start();
             try
             {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
+                using HttpMessage message = CreateGetModelInfoRequest(model, context);
                 return _pipeline.ProcessMessage(message, context);
             }
             catch (Exception e)
@@ -197,7 +201,7 @@ internal HttpMessage CreateCompleteRequest(RequestContent content, string extraP
             return message;
         }
 
-        internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
+        internal HttpMessage CreateGetModelInfoRequest(string model, RequestContext context)
         {
             var message = _pipeline.CreateMessage(context, ResponseClassifier200);
             var request = message.Request;
@@ -206,6 +210,10 @@ internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
             uri.Reset(_endpoint);
             uri.AppendPath("/info", false);
             uri.AppendQuery("api-version", _apiVersion, true);
+            if (model != null)
+            {
+                uri.AppendQuery("model", model, true);
+            }
             request.Uri = uri;
             request.Headers.Add("Accept", "application/json");
             return message;
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.Serialization.cs
index 610cf44dd156..18a5f47526df 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.Serialization.cs
@@ -55,6 +55,11 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
                 }
                 writer.WriteEndArray();
             }
+            if (options.Format != "W" && Optional.IsDefined(ReasoningContent))
+            {
+                writer.WritePropertyName("reasoning_content"u8);
+                writer.WriteStringValue(ReasoningContent);
+            }
             if (options.Format != "W" && _serializedAdditionalRawData != null)
             {
                 foreach (var item in _serializedAdditionalRawData)
@@ -95,6 +100,7 @@ internal static ChatResponseMessage DeserializeChatResponseMessage(JsonElement e
             ChatRole role = default;
             string content = default;
             IReadOnlyList<ChatCompletionsToolCall> toolCalls = default;
+            string reasoningContent = default;
             IDictionary<string, BinaryData> serializedAdditionalRawData = default;
             Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
@@ -128,13 +134,18 @@ internal static ChatResponseMessage DeserializeChatResponseMessage(JsonElement e
                     toolCalls = array;
                     continue;
                 }
+                if (property.NameEquals("reasoning_content"u8))
+                {
+                    reasoningContent = property.Value.GetString();
+                    continue;
+                }
                 if (options.Format != "W")
                 {
                     rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
                 }
             }
             serializedAdditionalRawData = rawDataDictionary;
-            return new ChatResponseMessage(role, content, toolCalls ?? new ChangeTrackingList<ChatCompletionsToolCall>(), serializedAdditionalRawData);
+            return new ChatResponseMessage(role, content, toolCalls ?? new ChangeTrackingList<ChatCompletionsToolCall>(), reasoningContent, serializedAdditionalRawData);
         }
 
         BinaryData IPersistableModel<ChatResponseMessage>.Write(ModelReaderWriterOptions options)
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.cs
index 6ba151e01adb..0ddd55f6c798 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatResponseMessage.cs
@@ -62,12 +62,14 @@ internal ChatResponseMessage(ChatRole role, string content)
         /// The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
         /// completions request to resolve as configured.
         /// </param>
+        /// <param name="reasoningContent"> The reasoning content the model used for generating the response. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal ChatResponseMessage(ChatRole role, string content, IReadOnlyList<ChatCompletionsToolCall> toolCalls, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        internal ChatResponseMessage(ChatRole role, string content, IReadOnlyList<ChatCompletionsToolCall> toolCalls, string reasoningContent, IDictionary<string, BinaryData> serializedAdditionalRawData)
         {
             Role = role;
             Content = content;
             ToolCalls = toolCalls;
+            ReasoningContent = reasoningContent;
             _serializedAdditionalRawData = serializedAdditionalRawData;
         }
 
@@ -85,5 +87,7 @@ internal ChatResponseMessage()
         /// completions request to resolve as configured.
         /// </summary>
         public IReadOnlyList<ChatCompletionsToolCall> ToolCalls { get; }
+        /// <summary> The reasoning content the model used for generating the response. </summary>
+        public string ReasoningContent { get; }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.Serialization.cs
index b98b9ffc340a..11418dbf6166 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.Serialization.cs
@@ -40,6 +40,16 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
             writer.WriteNumberValue(PromptTokens);
             writer.WritePropertyName("total_tokens"u8);
             writer.WriteNumberValue(TotalTokens);
+            if (Optional.IsDefined(CompletionTokensDetails))
+            {
+                writer.WritePropertyName("completion_tokens_details"u8);
+                writer.WriteObjectValue(CompletionTokensDetails, options);
+            }
+            if (Optional.IsDefined(PromptTokensDetails))
+            {
+                writer.WritePropertyName("prompt_tokens_details"u8);
+                writer.WriteObjectValue(PromptTokensDetails, options);
+            }
             if (options.Format != "W" && _serializedAdditionalRawData != null)
             {
                 foreach (var item in _serializedAdditionalRawData)
@@ -80,6 +90,8 @@ internal static CompletionsUsage DeserializeCompletionsUsage(JsonElement element
             int completionTokens = default;
             int promptTokens = default;
             int totalTokens = default;
+            CompletionsUsageDetails completionTokensDetails = default;
+            PromptUsageDetails promptTokensDetails = default;
             IDictionary<string, BinaryData> serializedAdditionalRawData = default;
             Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
@@ -99,13 +111,37 @@ internal static CompletionsUsage DeserializeCompletionsUsage(JsonElement element
                     totalTokens = property.Value.GetInt32();
                     continue;
                 }
+                if (property.NameEquals("completion_tokens_details"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    completionTokensDetails = CompletionsUsageDetails.DeserializeCompletionsUsageDetails(property.Value, options);
+                    continue;
+                }
+                if (property.NameEquals("prompt_tokens_details"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    promptTokensDetails = PromptUsageDetails.DeserializePromptUsageDetails(property.Value, options);
+                    continue;
+                }
                 if (options.Format != "W")
                 {
                     rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
                 }
             }
             serializedAdditionalRawData = rawDataDictionary;
-            return new CompletionsUsage(completionTokens, promptTokens, totalTokens, serializedAdditionalRawData);
+            return new CompletionsUsage(
+                completionTokens,
+                promptTokens,
+                totalTokens,
+                completionTokensDetails,
+                promptTokensDetails,
+                serializedAdditionalRawData);
         }
 
         BinaryData IPersistableModel<CompletionsUsage>.Write(ModelReaderWriterOptions options)
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.cs b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.cs
index b6affee7b206..066b89663732 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsage.cs
@@ -64,12 +64,16 @@ internal CompletionsUsage(int completionTokens, int promptTokens, int totalToken
         /// <param name="completionTokens"> The number of tokens generated across all completions emissions. </param>
         /// <param name="promptTokens"> The number of tokens in the provided prompts for the completions request. </param>
         /// <param name="totalTokens"> The total number of tokens processed for the completions request and response. </param>
+        /// <param name="completionTokensDetails"> Breakdown of tokens used in a completion. </param>
+        /// <param name="promptTokensDetails"> Breakdown of tokens used in the prompt/chat history. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal CompletionsUsage(int completionTokens, int promptTokens, int totalTokens, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        internal CompletionsUsage(int completionTokens, int promptTokens, int totalTokens, CompletionsUsageDetails completionTokensDetails, PromptUsageDetails promptTokensDetails, IDictionary<string, BinaryData> serializedAdditionalRawData)
         {
             CompletionTokens = completionTokens;
             PromptTokens = promptTokens;
             TotalTokens = totalTokens;
+            CompletionTokensDetails = completionTokensDetails;
+            PromptTokensDetails = promptTokensDetails;
             _serializedAdditionalRawData = serializedAdditionalRawData;
         }
 
@@ -84,5 +88,9 @@ internal CompletionsUsage()
         public int PromptTokens { get; }
         /// <summary> The total number of tokens processed for the completions request and response. </summary>
         public int TotalTokens { get; }
+        /// <summary> Breakdown of tokens used in a completion. </summary>
+        public CompletionsUsageDetails CompletionTokensDetails { get; }
+        /// <summary> Breakdown of tokens used in the prompt/chat history. </summary>
+        public PromptUsageDetails PromptTokensDetails { get; }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsageDetails.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsageDetails.Serialization.cs
new file mode 100644
index 000000000000..e4a4e1f5b8fa
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsageDetails.Serialization.cs
@@ -0,0 +1,167 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.ClientModel.Primitives;
+using System.Collections.Generic;
+using System.Text.Json;
+using Azure.Core;
+
+namespace Azure.AI.Inference
+{
+    public partial class CompletionsUsageDetails : IUtf8JsonSerializable, IJsonModel<CompletionsUsageDetails>
+    {
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<CompletionsUsageDetails>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+
+        void IJsonModel<CompletionsUsageDetails>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            writer.WriteStartObject();
+            JsonModelWriteCore(writer, options);
+            writer.WriteEndObject();
+        }
+
+        /// <param name="writer"> The JSON writer. </param>
+        /// <param name="options"> The client options for reading and writing models. </param>
+        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<CompletionsUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(CompletionsUsageDetails)} does not support writing '{format}' format.");
+            }
+
+            if (options.Format != "W")
+            {
+                writer.WritePropertyName("audio_tokens"u8);
+                writer.WriteNumberValue(AudioTokens);
+            }
+            if (options.Format != "W")
+            {
+                writer.WritePropertyName("reasoning_tokens"u8);
+                writer.WriteNumberValue(ReasoningTokens);
+            }
+            if (options.Format != "W")
+            {
+                writer.WritePropertyName("total_tokens"u8);
+                writer.WriteNumberValue(TotalTokens);
+            }
+            if (options.Format != "W" && _serializedAdditionalRawData != null)
+            {
+                foreach (var item in _serializedAdditionalRawData)
+                {
+                    writer.WritePropertyName(item.Key);
+#if NET6_0_OR_GREATER
+				writer.WriteRawValue(item.Value);
+#else
+                    using (JsonDocument document = JsonDocument.Parse(item.Value, ModelSerializationExtensions.JsonDocumentOptions))
+                    {
+                        JsonSerializer.Serialize(writer, document.RootElement);
+                    }
+#endif
+                }
+            }
+        }
+
+        CompletionsUsageDetails IJsonModel<CompletionsUsageDetails>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<CompletionsUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(CompletionsUsageDetails)} does not support reading '{format}' format.");
+            }
+
+            using JsonDocument document = JsonDocument.ParseValue(ref reader);
+            return DeserializeCompletionsUsageDetails(document.RootElement, options);
+        }
+
+        internal static CompletionsUsageDetails DeserializeCompletionsUsageDetails(JsonElement element, ModelReaderWriterOptions options = null)
+        {
+            options ??= ModelSerializationExtensions.WireOptions;
+
+            if (element.ValueKind == JsonValueKind.Null)
+            {
+                return null;
+            }
+            int audioTokens = default;
+            int reasoningTokens = default;
+            int totalTokens = default;
+            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
+            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
+            foreach (var property in element.EnumerateObject())
+            {
+                if (property.NameEquals("audio_tokens"u8))
+                {
+                    audioTokens = property.Value.GetInt32();
+                    continue;
+                }
+                if (property.NameEquals("reasoning_tokens"u8))
+                {
+                    reasoningTokens = property.Value.GetInt32();
+                    continue;
+                }
+                if (property.NameEquals("total_tokens"u8))
+                {
+                    totalTokens = property.Value.GetInt32();
+                    continue;
+                }
+                if (options.Format != "W")
+                {
+                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
+                }
+            }
+            serializedAdditionalRawData = rawDataDictionary;
+            return new CompletionsUsageDetails(audioTokens, reasoningTokens, totalTokens, serializedAdditionalRawData);
+        }
+
+        BinaryData IPersistableModel<CompletionsUsageDetails>.Write(ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<CompletionsUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    return ModelReaderWriter.Write(this, options);
+                default:
+                    throw new FormatException($"The model {nameof(CompletionsUsageDetails)} does not support writing '{options.Format}' format.");
+            }
+        }
+
+        CompletionsUsageDetails IPersistableModel<CompletionsUsageDetails>.Create(BinaryData data, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<CompletionsUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    {
+                        using JsonDocument document = JsonDocument.Parse(data, ModelSerializationExtensions.JsonDocumentOptions);
+                        return DeserializeCompletionsUsageDetails(document.RootElement, options);
+                    }
+                default:
+                    throw new FormatException($"The model {nameof(CompletionsUsageDetails)} does not support reading '{options.Format}' format.");
+            }
+        }
+
+        string IPersistableModel<CompletionsUsageDetails>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+
+        /// <summary> Deserializes the model from a raw response. </summary>
+        /// <param name="response"> The response to deserialize the model from. </param>
+        internal static CompletionsUsageDetails FromResponse(Response response)
+        {
+            using var document = JsonDocument.Parse(response.Content, ModelSerializationExtensions.JsonDocumentOptions);
+            return DeserializeCompletionsUsageDetails(document.RootElement);
+        }
+
+        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
+        internal virtual RequestContent ToRequestContent()
+        {
+            var content = new Utf8JsonRequestContent();
+            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
+            return content;
+        }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsageDetails.cs b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsageDetails.cs
new file mode 100644
index 000000000000..7ad4f9c10b1e
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/CompletionsUsageDetails.cs
@@ -0,0 +1,73 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+
+namespace Azure.AI.Inference
+{
+    /// <summary> A breakdown of tokens used in a completion. </summary>
+    public partial class CompletionsUsageDetails
+    {
+        /// <summary>
+        /// Keeps track of any properties unknown to the library.
+        /// <para>
+        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
+        /// </para>
+        /// <para>
+        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
+        /// </para>
+        /// <para>
+        /// Examples:
+        /// <list type="bullet">
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson("foo")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("\"foo\"")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// </list>
+        /// </para>
+        /// </summary>
+        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
+
+        /// <summary> Initializes a new instance of <see cref="CompletionsUsageDetails"/>. </summary>
+        internal CompletionsUsageDetails()
+        {
+        }
+
+        /// <summary> Initializes a new instance of <see cref="CompletionsUsageDetails"/>. </summary>
+        /// <param name="audioTokens"> The number of tokens corresponding to audio input. </param>
+        /// <param name="reasoningTokens"> The number of tokens corresponding to reasoning. </param>
+        /// <param name="totalTokens"> The total number of tokens processed for the completions request and response. </param>
+        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
+        internal CompletionsUsageDetails(int audioTokens, int reasoningTokens, int totalTokens, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        {
+            AudioTokens = audioTokens;
+            ReasoningTokens = reasoningTokens;
+            TotalTokens = totalTokens;
+            _serializedAdditionalRawData = serializedAdditionalRawData;
+        }
+
+        /// <summary> The number of tokens corresponding to audio input. </summary>
+        public int AudioTokens { get; }
+        /// <summary> The number of tokens corresponding to reasoning. </summary>
+        public int ReasoningTokens { get; }
+        /// <summary> The total number of tokens processed for the completions request and response. </summary>
+        public int TotalTokens { get; }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ChatCompletionsClient.xml b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ChatCompletionsClient.xml
index 572d47b762f2..0ea9e9e5c48f 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ChatCompletionsClient.xml
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ChatCompletionsClient.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <doc>
   <members>
-    <member name="GetModelInfoAsync(CancellationToken)">
+    <member name="GetModelInfoAsync(string,CancellationToken)">
       <example>
 This sample shows how to call GetModelInfoAsync.
 <code><![CDATA[
@@ -11,16 +11,16 @@ ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
 Response<ModelInfo> response = await client.GetModelInfoAsync();
 ]]></code>
-This sample shows how to call GetModelInfoAsync.
+This sample shows how to call GetModelInfoAsync with all parameters.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-Response<ModelInfo> response = await client.GetModelInfoAsync();
+Response<ModelInfo> response = await client.GetModelInfoAsync(model: "<model>");
 ]]></code></example>
     </member>
-    <member name="GetModelInfo(CancellationToken)">
+    <member name="GetModelInfo(string,CancellationToken)">
       <example>
 This sample shows how to call GetModelInfo.
 <code><![CDATA[
@@ -30,16 +30,16 @@ ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
 Response<ModelInfo> response = client.GetModelInfo();
 ]]></code>
-This sample shows how to call GetModelInfo.
+This sample shows how to call GetModelInfo with all parameters.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-Response<ModelInfo> response = client.GetModelInfo();
+Response<ModelInfo> response = client.GetModelInfo(model: "<model>");
 ]]></code></example>
     </member>
-    <member name="GetModelInfoAsync(RequestContext)">
+    <member name="GetModelInfoAsync(string,RequestContext)">
       <example>
 This sample shows how to call GetModelInfoAsync and parse the result.
 <code><![CDATA[
@@ -47,20 +47,20 @@ Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-Response response = await client.GetModelInfoAsync(null);
+Response response = await client.GetModelInfoAsync(null, null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
 Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code>
-This sample shows how to call GetModelInfoAsync and parse the result.
+This sample shows how to call GetModelInfoAsync with all parameters and parse the result.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-Response response = await client.GetModelInfoAsync(null);
+Response response = await client.GetModelInfoAsync("<model>", null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -68,7 +68,7 @@ Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code></example>
     </member>
-    <member name="GetModelInfo(RequestContext)">
+    <member name="GetModelInfo(string,RequestContext)">
       <example>
 This sample shows how to call GetModelInfo and parse the result.
 <code><![CDATA[
@@ -76,20 +76,20 @@ Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-Response response = client.GetModelInfo(null);
+Response response = client.GetModelInfo(null, null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
 Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code>
-This sample shows how to call GetModelInfo and parse the result.
+This sample shows how to call GetModelInfo with all parameters and parse the result.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-Response response = client.GetModelInfo(null);
+Response response = client.GetModelInfo("<model>", null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml
index 54f0260f0b6b..f63c4d5026ba 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <doc>
   <members>
-    <member name="GetModelInfoAsync(CancellationToken)">
+    <member name="GetModelInfoAsync(string,CancellationToken)">
       <example>
 This sample shows how to call GetModelInfoAsync.
 <code><![CDATA[
@@ -11,16 +11,16 @@ EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
 Response<ModelInfo> response = await client.GetModelInfoAsync();
 ]]></code>
-This sample shows how to call GetModelInfoAsync.
+This sample shows how to call GetModelInfoAsync with all parameters.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-Response<ModelInfo> response = await client.GetModelInfoAsync();
+Response<ModelInfo> response = await client.GetModelInfoAsync(model: "<model>");
 ]]></code></example>
     </member>
-    <member name="GetModelInfo(CancellationToken)">
+    <member name="GetModelInfo(string,CancellationToken)">
       <example>
 This sample shows how to call GetModelInfo.
 <code><![CDATA[
@@ -30,16 +30,16 @@ EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
 Response<ModelInfo> response = client.GetModelInfo();
 ]]></code>
-This sample shows how to call GetModelInfo.
+This sample shows how to call GetModelInfo with all parameters.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-Response<ModelInfo> response = client.GetModelInfo();
+Response<ModelInfo> response = client.GetModelInfo(model: "<model>");
 ]]></code></example>
     </member>
-    <member name="GetModelInfoAsync(RequestContext)">
+    <member name="GetModelInfoAsync(string,RequestContext)">
       <example>
 This sample shows how to call GetModelInfoAsync and parse the result.
 <code><![CDATA[
@@ -47,20 +47,20 @@ Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-Response response = await client.GetModelInfoAsync(null);
+Response response = await client.GetModelInfoAsync(null, null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
 Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code>
-This sample shows how to call GetModelInfoAsync and parse the result.
+This sample shows how to call GetModelInfoAsync with all parameters and parse the result.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-Response response = await client.GetModelInfoAsync(null);
+Response response = await client.GetModelInfoAsync("<model>", null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -68,7 +68,7 @@ Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code></example>
     </member>
-    <member name="GetModelInfo(RequestContext)">
+    <member name="GetModelInfo(string,RequestContext)">
       <example>
 This sample shows how to call GetModelInfo and parse the result.
 <code><![CDATA[
@@ -76,20 +76,20 @@ Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-Response response = client.GetModelInfo(null);
+Response response = client.GetModelInfo(null, null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
 Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code>
-This sample shows how to call GetModelInfo and parse the result.
+This sample shows how to call GetModelInfo with all parameters and parse the result.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-Response response = client.GetModelInfo(null);
+Response response = client.GetModelInfo("<model>", null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ImageEmbeddingsClient.xml b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ImageEmbeddingsClient.xml
index 280a6ad03aa7..52996dbd7b6a 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ImageEmbeddingsClient.xml
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/ImageEmbeddingsClient.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <doc>
   <members>
-    <member name="GetModelInfoAsync(CancellationToken)">
+    <member name="GetModelInfoAsync(string,CancellationToken)">
       <example>
 This sample shows how to call GetModelInfoAsync.
 <code><![CDATA[
@@ -11,16 +11,16 @@ ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
 Response<ModelInfo> response = await client.GetModelInfoAsync();
 ]]></code>
-This sample shows how to call GetModelInfoAsync.
+This sample shows how to call GetModelInfoAsync with all parameters.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-Response<ModelInfo> response = await client.GetModelInfoAsync();
+Response<ModelInfo> response = await client.GetModelInfoAsync(model: "<model>");
 ]]></code></example>
     </member>
-    <member name="GetModelInfo(CancellationToken)">
+    <member name="GetModelInfo(string,CancellationToken)">
       <example>
 This sample shows how to call GetModelInfo.
 <code><![CDATA[
@@ -30,16 +30,16 @@ ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
 Response<ModelInfo> response = client.GetModelInfo();
 ]]></code>
-This sample shows how to call GetModelInfo.
+This sample shows how to call GetModelInfo with all parameters.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-Response<ModelInfo> response = client.GetModelInfo();
+Response<ModelInfo> response = client.GetModelInfo(model: "<model>");
 ]]></code></example>
     </member>
-    <member name="GetModelInfoAsync(RequestContext)">
+    <member name="GetModelInfoAsync(string,RequestContext)">
       <example>
 This sample shows how to call GetModelInfoAsync and parse the result.
 <code><![CDATA[
@@ -47,20 +47,20 @@ Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-Response response = await client.GetModelInfoAsync(null);
+Response response = await client.GetModelInfoAsync(null, null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
 Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code>
-This sample shows how to call GetModelInfoAsync and parse the result.
+This sample shows how to call GetModelInfoAsync with all parameters and parse the result.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-Response response = await client.GetModelInfoAsync(null);
+Response response = await client.GetModelInfoAsync("<model>", null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -68,7 +68,7 @@ Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code></example>
     </member>
-    <member name="GetModelInfo(RequestContext)">
+    <member name="GetModelInfo(string,RequestContext)">
       <example>
 This sample shows how to call GetModelInfo and parse the result.
 <code><![CDATA[
@@ -76,20 +76,20 @@ Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-Response response = client.GetModelInfo(null);
+Response response = client.GetModelInfo(null, null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
 Console.WriteLine(result.GetProperty("model_type").ToString());
 Console.WriteLine(result.GetProperty("model_provider_name").ToString());
 ]]></code>
-This sample shows how to call GetModelInfo and parse the result.
+This sample shows how to call GetModelInfo with all parameters and parse the result.
 <code><![CDATA[
 Uri endpoint = new Uri("<endpoint>");
 AzureKeyCredential credential = new AzureKeyCredential("<key>");
 ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-Response response = client.GetModelInfo(null);
+Response response = client.GetModelInfo("<model>", null);
 
 JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
 Console.WriteLine(result.GetProperty("model_name").ToString());
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs
index 2b2f1bed6a53..2eab5b867a07 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs
@@ -20,7 +20,7 @@ public partial class EmbeddingsClient
         private const string AuthorizationHeader = "Authorization";
         private readonly AzureKeyCredential _keyCredential;
         private const string AuthorizationApiKeyPrefix = "Bearer";
-        private static readonly string[] AuthorizationScopes = new string[] { "https://ml.azure.com/.default" };
+        private static readonly string[] AuthorizationScopes = new string[] { "https://cognitiveservices.azure.com/.default" };
         private readonly TokenCredential _tokenCredential;
         private readonly HttpPipeline _pipeline;
         private readonly Uri _endpoint;
@@ -75,14 +75,15 @@ public EmbeddingsClient(Uri endpoint, TokenCredential credential, AzureAIInferen
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(CancellationToken)']/*" />
-        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationToken cancellationToken = default)
+        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(string,CancellationToken)']/*" />
+        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(string model = null, CancellationToken cancellationToken = default)
         {
             RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = await GetModelInfoAsync(context).ConfigureAwait(false);
+            Response response = await GetModelInfoAsync(model, context).ConfigureAwait(false);
             return Response.FromValue(ModelInfo.FromResponse(response), response);
         }
 
@@ -90,14 +91,15 @@ public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationTok
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(CancellationToken)']/*" />
-        public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationToken = default)
+        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(string,CancellationToken)']/*" />
+        public virtual Response<ModelInfo> GetModelInfo(string model = null, CancellationToken cancellationToken = default)
         {
             RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = GetModelInfo(context);
+            Response response = GetModelInfo(model, context);
             return Response.FromValue(ModelInfo.FromResponse(response), response);
         }
 
@@ -105,7 +107,7 @@ public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationTo
         /// [Protocol Method] Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// <list type="bullet">
         /// <item>
         /// <description>
@@ -114,22 +116,23 @@ public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationTo
         /// </item>
         /// <item>
         /// <description>
-        /// Please try the simpler <see cref="GetModelInfoAsync(CancellationToken)"/> convenience overload with strongly typed models first.
+        /// Please try the simpler <see cref="GetModelInfoAsync(string,CancellationToken)"/> convenience overload with strongly typed models first.
         /// </description>
         /// </item>
         /// </list>
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
         /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
         /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(RequestContext)']/*" />
-        public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
+        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(string,RequestContext)']/*" />
+        public virtual async Task<Response> GetModelInfoAsync(string model, RequestContext context)
         {
             using var scope = ClientDiagnostics.CreateScope("EmbeddingsClient.GetModelInfo");
             scope.Start();
             try
             {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
+                using HttpMessage message = CreateGetModelInfoRequest(model, context);
                 return await _pipeline.ProcessMessageAsync(message, context).ConfigureAwait(false);
             }
             catch (Exception e)
@@ -143,7 +146,7 @@ public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
         /// [Protocol Method] Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// <list type="bullet">
         /// <item>
         /// <description>
@@ -152,22 +155,23 @@ public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
         /// </item>
         /// <item>
         /// <description>
-        /// Please try the simpler <see cref="GetModelInfo(CancellationToken)"/> convenience overload with strongly typed models first.
+        /// Please try the simpler <see cref="GetModelInfo(string,CancellationToken)"/> convenience overload with strongly typed models first.
         /// </description>
         /// </item>
         /// </list>
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
         /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
         /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(RequestContext)']/*" />
-        public virtual Response GetModelInfo(RequestContext context)
+        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(string,RequestContext)']/*" />
+        public virtual Response GetModelInfo(string model, RequestContext context)
         {
             using var scope = ClientDiagnostics.CreateScope("EmbeddingsClient.GetModelInfo");
             scope.Start();
             try
             {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
+                using HttpMessage message = CreateGetModelInfoRequest(model, context);
                 return _pipeline.ProcessMessage(message, context);
             }
             catch (Exception e)
@@ -197,7 +201,7 @@ internal HttpMessage CreateEmbedRequest(RequestContent content, string extraPara
             return message;
         }
 
-        internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
+        internal HttpMessage CreateGetModelInfoRequest(string model, RequestContext context)
         {
             var message = _pipeline.CreateMessage(context, ResponseClassifier200);
             var request = message.Request;
@@ -206,6 +210,10 @@ internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
             uri.Reset(_endpoint);
             uri.AppendPath("/info", false);
             uri.AppendQuery("api-version", _apiVersion, true);
+            if (model != null)
+            {
+                uri.AppendQuery("model", model, true);
+            }
             request.Uri = uri;
             request.Headers.Add("Accept", "application/json");
             return message;
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ImageEmbeddingsClient.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ImageEmbeddingsClient.cs
index 9bf443cc4dcf..fb67907e0061 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ImageEmbeddingsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ImageEmbeddingsClient.cs
@@ -20,7 +20,7 @@ public partial class ImageEmbeddingsClient
         private const string AuthorizationHeader = "Authorization";
         private readonly AzureKeyCredential _keyCredential;
         private const string AuthorizationApiKeyPrefix = "Bearer";
-        private static readonly string[] AuthorizationScopes = new string[] { "https://ml.azure.com/.default" };
+        private static readonly string[] AuthorizationScopes = new string[] { "https://cognitiveservices.azure.com/.default" };
         private readonly TokenCredential _tokenCredential;
         private readonly HttpPipeline _pipeline;
         private readonly Uri _endpoint;
@@ -75,14 +75,15 @@ public ImageEmbeddingsClient(Uri endpoint, TokenCredential credential, AzureAIIn
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(CancellationToken)']/*" />
-        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationToken cancellationToken = default)
+        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(string,CancellationToken)']/*" />
+        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(string model = null, CancellationToken cancellationToken = default)
         {
             RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = await GetModelInfoAsync(context).ConfigureAwait(false);
+            Response response = await GetModelInfoAsync(model, context).ConfigureAwait(false);
             return Response.FromValue(ModelInfo.FromResponse(response), response);
         }
 
@@ -90,14 +91,15 @@ public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationTok
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(CancellationToken)']/*" />
-        public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationToken = default)
+        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(string,CancellationToken)']/*" />
+        public virtual Response<ModelInfo> GetModelInfo(string model = null, CancellationToken cancellationToken = default)
         {
             RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = GetModelInfo(context);
+            Response response = GetModelInfo(model, context);
             return Response.FromValue(ModelInfo.FromResponse(response), response);
         }
 
@@ -105,7 +107,7 @@ public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationTo
         /// [Protocol Method] Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// <list type="bullet">
         /// <item>
         /// <description>
@@ -114,22 +116,23 @@ public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationTo
         /// </item>
         /// <item>
         /// <description>
-        /// Please try the simpler <see cref="GetModelInfoAsync(CancellationToken)"/> convenience overload with strongly typed models first.
+        /// Please try the simpler <see cref="GetModelInfoAsync(string,CancellationToken)"/> convenience overload with strongly typed models first.
         /// </description>
         /// </item>
         /// </list>
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
         /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
         /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(RequestContext)']/*" />
-        public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
+        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(string,RequestContext)']/*" />
+        public virtual async Task<Response> GetModelInfoAsync(string model, RequestContext context)
         {
             using var scope = ClientDiagnostics.CreateScope("ImageEmbeddingsClient.GetModelInfo");
             scope.Start();
             try
             {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
+                using HttpMessage message = CreateGetModelInfoRequest(model, context);
                 return await _pipeline.ProcessMessageAsync(message, context).ConfigureAwait(false);
             }
             catch (Exception e)
@@ -143,7 +146,7 @@ public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
         /// [Protocol Method] Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
         /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+        /// It will not work for Azure OpenAI endpoints.
         /// <list type="bullet">
         /// <item>
         /// <description>
@@ -152,22 +155,23 @@ public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
         /// </item>
         /// <item>
         /// <description>
-        /// Please try the simpler <see cref="GetModelInfo(CancellationToken)"/> convenience overload with strongly typed models first.
+        /// Please try the simpler <see cref="GetModelInfo(string,CancellationToken)"/> convenience overload with strongly typed models first.
         /// </description>
         /// </item>
         /// </list>
         /// </summary>
+        /// <param name="model"> The model deployment name you want information from. </param>
         /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
         /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
         /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(RequestContext)']/*" />
-        public virtual Response GetModelInfo(RequestContext context)
+        /// <include file="Docs/ImageEmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(string,RequestContext)']/*" />
+        public virtual Response GetModelInfo(string model, RequestContext context)
         {
             using var scope = ClientDiagnostics.CreateScope("ImageEmbeddingsClient.GetModelInfo");
             scope.Start();
             try
             {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
+                using HttpMessage message = CreateGetModelInfoRequest(model, context);
                 return _pipeline.ProcessMessage(message, context);
             }
             catch (Exception e)
@@ -197,7 +201,7 @@ internal HttpMessage CreateEmbedRequest(RequestContent content, string extraPara
             return message;
         }
 
-        internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
+        internal HttpMessage CreateGetModelInfoRequest(string model, RequestContext context)
         {
             var message = _pipeline.CreateMessage(context, ResponseClassifier200);
             var request = message.Request;
@@ -206,6 +210,10 @@ internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
             uri.Reset(_endpoint);
             uri.AppendPath("/info", false);
             uri.AppendQuery("api-version", _apiVersion, true);
+            if (model != null)
+            {
+                uri.AppendQuery("model", model, true);
+            }
             request.Uri = uri;
             request.Headers.Add("Accept", "application/json");
             return message;
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/PromptUsageDetails.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/PromptUsageDetails.Serialization.cs
new file mode 100644
index 000000000000..f24f05e83c6e
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/PromptUsageDetails.Serialization.cs
@@ -0,0 +1,156 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.ClientModel.Primitives;
+using System.Collections.Generic;
+using System.Text.Json;
+using Azure.Core;
+
+namespace Azure.AI.Inference
+{
+    public partial class PromptUsageDetails : IUtf8JsonSerializable, IJsonModel<PromptUsageDetails>
+    {
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<PromptUsageDetails>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+
+        void IJsonModel<PromptUsageDetails>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            writer.WriteStartObject();
+            JsonModelWriteCore(writer, options);
+            writer.WriteEndObject();
+        }
+
+        /// <param name="writer"> The JSON writer. </param>
+        /// <param name="options"> The client options for reading and writing models. </param>
+        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<PromptUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(PromptUsageDetails)} does not support writing '{format}' format.");
+            }
+
+            if (options.Format != "W")
+            {
+                writer.WritePropertyName("audio_tokens"u8);
+                writer.WriteNumberValue(AudioTokens);
+            }
+            if (options.Format != "W")
+            {
+                writer.WritePropertyName("cached_tokens"u8);
+                writer.WriteNumberValue(CachedTokens);
+            }
+            if (options.Format != "W" && _serializedAdditionalRawData != null)
+            {
+                foreach (var item in _serializedAdditionalRawData)
+                {
+                    writer.WritePropertyName(item.Key);
+#if NET6_0_OR_GREATER
+				writer.WriteRawValue(item.Value);
+#else
+                    using (JsonDocument document = JsonDocument.Parse(item.Value, ModelSerializationExtensions.JsonDocumentOptions))
+                    {
+                        JsonSerializer.Serialize(writer, document.RootElement);
+                    }
+#endif
+                }
+            }
+        }
+
+        PromptUsageDetails IJsonModel<PromptUsageDetails>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<PromptUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(PromptUsageDetails)} does not support reading '{format}' format.");
+            }
+
+            using JsonDocument document = JsonDocument.ParseValue(ref reader);
+            return DeserializePromptUsageDetails(document.RootElement, options);
+        }
+
+        internal static PromptUsageDetails DeserializePromptUsageDetails(JsonElement element, ModelReaderWriterOptions options = null)
+        {
+            options ??= ModelSerializationExtensions.WireOptions;
+
+            if (element.ValueKind == JsonValueKind.Null)
+            {
+                return null;
+            }
+            int audioTokens = default;
+            int cachedTokens = default;
+            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
+            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
+            foreach (var property in element.EnumerateObject())
+            {
+                if (property.NameEquals("audio_tokens"u8))
+                {
+                    audioTokens = property.Value.GetInt32();
+                    continue;
+                }
+                if (property.NameEquals("cached_tokens"u8))
+                {
+                    cachedTokens = property.Value.GetInt32();
+                    continue;
+                }
+                if (options.Format != "W")
+                {
+                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
+                }
+            }
+            serializedAdditionalRawData = rawDataDictionary;
+            return new PromptUsageDetails(audioTokens, cachedTokens, serializedAdditionalRawData);
+        }
+
+        BinaryData IPersistableModel<PromptUsageDetails>.Write(ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<PromptUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    return ModelReaderWriter.Write(this, options);
+                default:
+                    throw new FormatException($"The model {nameof(PromptUsageDetails)} does not support writing '{options.Format}' format.");
+            }
+        }
+
+        PromptUsageDetails IPersistableModel<PromptUsageDetails>.Create(BinaryData data, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<PromptUsageDetails>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    {
+                        using JsonDocument document = JsonDocument.Parse(data, ModelSerializationExtensions.JsonDocumentOptions);
+                        return DeserializePromptUsageDetails(document.RootElement, options);
+                    }
+                default:
+                    throw new FormatException($"The model {nameof(PromptUsageDetails)} does not support reading '{options.Format}' format.");
+            }
+        }
+
+        string IPersistableModel<PromptUsageDetails>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+
+        /// <summary> Deserializes the model from a raw response. </summary>
+        /// <param name="response"> The response to deserialize the model from. </param>
+        internal static PromptUsageDetails FromResponse(Response response)
+        {
+            using var document = JsonDocument.Parse(response.Content, ModelSerializationExtensions.JsonDocumentOptions);
+            return DeserializePromptUsageDetails(document.RootElement);
+        }
+
+        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
+        internal virtual RequestContent ToRequestContent()
+        {
+            var content = new Utf8JsonRequestContent();
+            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
+            return content;
+        }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/PromptUsageDetails.cs b/sdk/ai/Azure.AI.Inference/src/Generated/PromptUsageDetails.cs
new file mode 100644
index 000000000000..c5da6f3623b1
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/PromptUsageDetails.cs
@@ -0,0 +1,69 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+
+namespace Azure.AI.Inference
+{
+    /// <summary> A breakdown of tokens used in the prompt/chat history. </summary>
+    public partial class PromptUsageDetails
+    {
+        /// <summary>
+        /// Keeps track of any properties unknown to the library.
+        /// <para>
+        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
+        /// </para>
+        /// <para>
+        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
+        /// </para>
+        /// <para>
+        /// Examples:
+        /// <list type="bullet">
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson("foo")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("\"foo\"")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// </list>
+        /// </para>
+        /// </summary>
+        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
+
+        /// <summary> Initializes a new instance of <see cref="PromptUsageDetails"/>. </summary>
+        internal PromptUsageDetails()
+        {
+        }
+
+        /// <summary> Initializes a new instance of <see cref="PromptUsageDetails"/>. </summary>
+        /// <param name="audioTokens"> The number of tokens corresponding to audio input. </param>
+        /// <param name="cachedTokens"> The total number of tokens cached. </param>
+        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
+        internal PromptUsageDetails(int audioTokens, int cachedTokens, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        {
+            AudioTokens = audioTokens;
+            CachedTokens = cachedTokens;
+            _serializedAdditionalRawData = serializedAdditionalRawData;
+        }
+
+        /// <summary> The number of tokens corresponding to audio input. </summary>
+        public int AudioTokens { get; }
+        /// <summary> The total number of tokens cached. </summary>
+        public int CachedTokens { get; }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.Serialization.cs
index cd6eb8f18aad..f86291cd2e2f 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.Serialization.cs
@@ -53,6 +53,11 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
                 }
                 writer.WriteEndArray();
             }
+            if (options.Format != "W" && Optional.IsDefined(ReasoningContent))
+            {
+                writer.WritePropertyName("reasoning_content"u8);
+                writer.WriteStringValue(ReasoningContent);
+            }
             if (options.Format != "W" && _serializedAdditionalRawData != null)
             {
                 foreach (var item in _serializedAdditionalRawData)
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.cs b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.cs
index 7d6808846e19..2a7c5972f96b 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatResponseMessageUpdate.cs
@@ -58,12 +58,14 @@ internal StreamingChatResponseMessageUpdate()
         /// The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
         /// completions request to resolve as configured.
         /// </param>
+        /// <param name="reasoningContent"> The reasoning content the model used for generating the response. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal StreamingChatResponseMessageUpdate(ChatRole? role, string content, IReadOnlyList<StreamingChatResponseToolCallUpdate> toolCalls, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        internal StreamingChatResponseMessageUpdate(ChatRole? role, string content, IReadOnlyList<StreamingChatResponseToolCallUpdate> toolCalls, string reasoningContent, IDictionary<string, BinaryData> serializedAdditionalRawData)
         {
             Role = role;
             Content = content;
             ToolCalls = toolCalls;
+            ReasoningContent = reasoningContent;
             _serializedAdditionalRawData = serializedAdditionalRawData;
         }
 
@@ -71,5 +73,7 @@ internal StreamingChatResponseMessageUpdate(ChatRole? role, string content, IRea
         public ChatRole? Role { get; }
         /// <summary> The content of the message. </summary>
         public string Content { get; }
+        /// <summary> The reasoning content the model used for generating the response. </summary>
+        public string ReasoningContent { get; }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ChatCompletionsClient.cs b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ChatCompletionsClient.cs
index b52f3fc3727f..c840c7c6c435 100644
--- a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ChatCompletionsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ChatCompletionsClient.cs
@@ -17,13 +17,13 @@ public partial class Samples_ChatCompletionsClient
     {
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client1_GetModelInfo_MaximumSetModelInformation()
+        public void Example_Client1_GetModelInfo_ShortVersion()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-            Response response = client.GetModelInfo(null);
+            Response response = client.GetModelInfo(null, null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -33,13 +33,13 @@ public void Example_Client1_GetModelInfo_MaximumSetModelInformation()
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client1_GetModelInfo_MaximumSetModelInformation_Async()
+        public async Task Example_Client1_GetModelInfo_ShortVersion_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-            Response response = await client.GetModelInfoAsync(null);
+            Response response = await client.GetModelInfoAsync(null, null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -49,7 +49,7 @@ public async Task Example_Client1_GetModelInfo_MaximumSetModelInformation_Async(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client1_GetModelInfo_MaximumSetModelInformation_Convenience()
+        public void Example_Client1_GetModelInfo_ShortVersion_Convenience()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
@@ -60,7 +60,7 @@ public void Example_Client1_GetModelInfo_MaximumSetModelInformation_Convenience(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client1_GetModelInfo_MaximumSetModelInformation_Convenience_Async()
+        public async Task Example_Client1_GetModelInfo_ShortVersion_Convenience_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
@@ -71,13 +71,13 @@ public async Task Example_Client1_GetModelInfo_MaximumSetModelInformation_Conven
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client1_GetModelInfo_MinimumSetModelInformation()
+        public void Example_Client1_GetModelInfo_AllParameters()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-            Response response = client.GetModelInfo(null);
+            Response response = client.GetModelInfo("<model>", null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -87,13 +87,13 @@ public void Example_Client1_GetModelInfo_MinimumSetModelInformation()
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client1_GetModelInfo_MinimumSetModelInformation_Async()
+        public async Task Example_Client1_GetModelInfo_AllParameters_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-            Response response = await client.GetModelInfoAsync(null);
+            Response response = await client.GetModelInfoAsync("<model>", null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -103,24 +103,24 @@ public async Task Example_Client1_GetModelInfo_MinimumSetModelInformation_Async(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client1_GetModelInfo_MinimumSetModelInformation_Convenience()
+        public void Example_Client1_GetModelInfo_AllParameters_Convenience()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-            Response<ModelInfo> response = client.GetModelInfo();
+            Response<ModelInfo> response = client.GetModelInfo(model: "<model>");
         }
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client1_GetModelInfo_MinimumSetModelInformation_Convenience_Async()
+        public async Task Example_Client1_GetModelInfo_AllParameters_Convenience_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ChatCompletionsClient client = new ChatCompletionsClient(endpoint, credential);
 
-            Response<ModelInfo> response = await client.GetModelInfoAsync();
+            Response<ModelInfo> response = await client.GetModelInfoAsync(model: "<model>");
         }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs
index a6f43d432014..81aa527263b2 100644
--- a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs
@@ -17,13 +17,13 @@ public partial class Samples_EmbeddingsClient
     {
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MaximumSetModelInformation()
+        public void Example_Client2_GetModelInfo_ShortVersion()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-            Response response = client.GetModelInfo(null);
+            Response response = client.GetModelInfo(null, null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -33,13 +33,13 @@ public void Example_Client2_GetModelInfo_MaximumSetModelInformation()
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MaximumSetModelInformation_Async()
+        public async Task Example_Client2_GetModelInfo_ShortVersion_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-            Response response = await client.GetModelInfoAsync(null);
+            Response response = await client.GetModelInfoAsync(null, null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -49,7 +49,7 @@ public async Task Example_Client2_GetModelInfo_MaximumSetModelInformation_Async(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MaximumSetModelInformation_Convenience()
+        public void Example_Client2_GetModelInfo_ShortVersion_Convenience()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
@@ -60,7 +60,7 @@ public void Example_Client2_GetModelInfo_MaximumSetModelInformation_Convenience(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MaximumSetModelInformation_Convenience_Async()
+        public async Task Example_Client2_GetModelInfo_ShortVersion_Convenience_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
@@ -71,13 +71,13 @@ public async Task Example_Client2_GetModelInfo_MaximumSetModelInformation_Conven
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MinimumSetModelInformation()
+        public void Example_Client2_GetModelInfo_AllParameters()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-            Response response = client.GetModelInfo(null);
+            Response response = client.GetModelInfo("<model>", null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -87,13 +87,13 @@ public void Example_Client2_GetModelInfo_MinimumSetModelInformation()
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MinimumSetModelInformation_Async()
+        public async Task Example_Client2_GetModelInfo_AllParameters_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-            Response response = await client.GetModelInfoAsync(null);
+            Response response = await client.GetModelInfoAsync("<model>", null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -103,24 +103,24 @@ public async Task Example_Client2_GetModelInfo_MinimumSetModelInformation_Async(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MinimumSetModelInformation_Convenience()
+        public void Example_Client2_GetModelInfo_AllParameters_Convenience()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-            Response<ModelInfo> response = client.GetModelInfo();
+            Response<ModelInfo> response = client.GetModelInfo(model: "<model>");
         }
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MinimumSetModelInformation_Convenience_Async()
+        public async Task Example_Client2_GetModelInfo_AllParameters_Convenience_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
 
-            Response<ModelInfo> response = await client.GetModelInfoAsync();
+            Response<ModelInfo> response = await client.GetModelInfoAsync(model: "<model>");
         }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ImageEmbeddingsClient.cs b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ImageEmbeddingsClient.cs
index 7c068f00b89c..d89f6497306d 100644
--- a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ImageEmbeddingsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_ImageEmbeddingsClient.cs
@@ -17,13 +17,13 @@ public partial class Samples_ImageEmbeddingsClient
     {
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client3_GetModelInfo_MaximumSetModelInformation()
+        public void Example_Client3_GetModelInfo_ShortVersion()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-            Response response = client.GetModelInfo(null);
+            Response response = client.GetModelInfo(null, null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -33,13 +33,13 @@ public void Example_Client3_GetModelInfo_MaximumSetModelInformation()
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client3_GetModelInfo_MaximumSetModelInformation_Async()
+        public async Task Example_Client3_GetModelInfo_ShortVersion_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-            Response response = await client.GetModelInfoAsync(null);
+            Response response = await client.GetModelInfoAsync(null, null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -49,7 +49,7 @@ public async Task Example_Client3_GetModelInfo_MaximumSetModelInformation_Async(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client3_GetModelInfo_MaximumSetModelInformation_Convenience()
+        public void Example_Client3_GetModelInfo_ShortVersion_Convenience()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
@@ -60,7 +60,7 @@ public void Example_Client3_GetModelInfo_MaximumSetModelInformation_Convenience(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client3_GetModelInfo_MaximumSetModelInformation_Convenience_Async()
+        public async Task Example_Client3_GetModelInfo_ShortVersion_Convenience_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
@@ -71,13 +71,13 @@ public async Task Example_Client3_GetModelInfo_MaximumSetModelInformation_Conven
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client3_GetModelInfo_MinimumSetModelInformation()
+        public void Example_Client3_GetModelInfo_AllParameters()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-            Response response = client.GetModelInfo(null);
+            Response response = client.GetModelInfo("<model>", null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -87,13 +87,13 @@ public void Example_Client3_GetModelInfo_MinimumSetModelInformation()
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client3_GetModelInfo_MinimumSetModelInformation_Async()
+        public async Task Example_Client3_GetModelInfo_AllParameters_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-            Response response = await client.GetModelInfoAsync(null);
+            Response response = await client.GetModelInfoAsync("<model>", null);
 
             JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
             Console.WriteLine(result.GetProperty("model_name").ToString());
@@ -103,24 +103,24 @@ public async Task Example_Client3_GetModelInfo_MinimumSetModelInformation_Async(
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public void Example_Client3_GetModelInfo_MinimumSetModelInformation_Convenience()
+        public void Example_Client3_GetModelInfo_AllParameters_Convenience()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-            Response<ModelInfo> response = client.GetModelInfo();
+            Response<ModelInfo> response = client.GetModelInfo(model: "<model>");
         }
 
         [Test]
         [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client3_GetModelInfo_MinimumSetModelInformation_Convenience_Async()
+        public async Task Example_Client3_GetModelInfo_AllParameters_Convenience_Async()
         {
             Uri endpoint = new Uri("<endpoint>");
             AzureKeyCredential credential = new AzureKeyCredential("<key>");
             ImageEmbeddingsClient client = new ImageEmbeddingsClient(endpoint, credential);
 
-            Response<ModelInfo> response = await client.GetModelInfoAsync();
+            Response<ModelInfo> response = await client.GetModelInfoAsync(model: "<model>");
         }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/tsp-location.yaml b/sdk/ai/Azure.AI.Inference/tsp-location.yaml
index 84d0451aecc0..0cc0fb9d1f8d 100644
--- a/sdk/ai/Azure.AI.Inference/tsp-location.yaml
+++ b/sdk/ai/Azure.AI.Inference/tsp-location.yaml
@@ -1,3 +1,4 @@
 directory: specification/ai/ModelClient
+commit: caee61180367b726db470989a36ee57837825a37
 repo: Azure/azure-rest-api-specs
-commit: 9524584541371b1fc89720e9325332f52f850e70
+additionalDirectories: