dotnet · stephentoub · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.DiagnosticIds;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>
+/// Provides an optional base class for an <see cref="ITextToSpeechClient"/> that passes through calls to another instance.
+/// </summary>
+/// <remarks>
+/// This is recommended as a base type when building clients that can be chained in any order around an underlying <see cref="ITextToSpeechClient"/>.
+/// The default implementation simply passes each call to the inner client instance.
+/// </remarks>
+[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
+public class DelegatingTextToSpeechClient : ITextToSpeechClient
+{
+    /// <summary>
+    /// Initializes a new instance of the <see cref="DelegatingTextToSpeechClient"/> class.
+    /// </summary>
+    /// <param name="innerClient">The wrapped client instance.</param>
+    protected DelegatingTextToSpeechClient(ITextToSpeechClient innerClient)
+    {
+        InnerClient = Throw.IfNull(innerClient);
+    }
+
+    /// <inheritdoc />
+    public void Dispose()
+    {
+        Dispose(disposing: true);
+        GC.SuppressFinalize(this);
+    }
+
+    /// <summary>Gets the inner <see cref="ITextToSpeechClient" />.</summary>
+    protected ITextToSpeechClient InnerClient { get; }
+
+    /// <inheritdoc />
+    public virtual Task<TextToSpeechResponse> GetAudioAsync(
+        string text, TextToSpeechOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        return InnerClient.GetAudioAsync(text, options, cancellationToken);
+    }
+
+    /// <inheritdoc />
+    public virtual IAsyncEnumerable<TextToSpeechResponseUpdate> GetStreamingAudioAsync(
+        string text, TextToSpeechOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        return InnerClient.GetStreamingAudioAsync(text, options, cancellationToken);
+    }
+
+    /// <inheritdoc />
+    public virtual object? GetService(Type serviceType, object? serviceKey = null)
+    {
+        _ = Throw.IfNull(serviceType);
+
+        // If the key is non-null, we don't know what it means so pass through to the inner service.
+        return
+            serviceKey is null && serviceType.IsInstanceOfType(this) ? this :
+            InnerClient.GetService(serviceType, serviceKey);
+    }
+
+    /// <summary>Provides a mechanism for releasing unmanaged resources.</summary>
+    /// <param name="disposing"><see langword="true"/> if being called from <see cref="Dispose()"/>; otherwise, <see langword="false"/>.</param>
+    protected virtual void Dispose(bool disposing)
+    {
+        if (disposing)
+        {
+            InnerClient.Dispose();
+        }
+    }
+}
@@ -0,0 +1,62 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.DiagnosticIds;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents a text to speech client.</summary>
+/// <remarks>
+/// <para>
+/// Unless otherwise specified, all members of <see cref="ITextToSpeechClient"/> are thread-safe for concurrent use.
+/// It is expected that all implementations of <see cref="ITextToSpeechClient"/> support being used by multiple requests concurrently.
+/// </para>
+/// <para>
+/// However, implementations of <see cref="ITextToSpeechClient"/> might mutate the arguments supplied to <see cref="GetAudioAsync"/> and
+/// <see cref="GetStreamingAudioAsync"/>, such as by configuring the options instance. Thus, consumers of the interface either should avoid
+/// using shared instances of these arguments for concurrent invocations or should otherwise ensure by construction that no
+/// <see cref="ITextToSpeechClient"/> instances are used which might employ such mutation. For example, the ConfigureOptions method may be
+/// provided with a callback that could mutate the supplied options argument, and that should be avoided if using a singleton options instance.
+/// </para>
+/// </remarks>
+[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
+public interface ITextToSpeechClient : IDisposable
+{
+    /// <summary>Sends text content to the model and returns the generated audio speech.</summary>
+    /// <param name="text">The text to synthesize into speech.</param>
+    /// <param name="options">The text to speech options to configure the request.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The audio speech generated.</returns>
+    /// <exception cref="ArgumentNullException"><paramref name="text"/> is <see langword="null"/>.</exception>
+    Task<TextToSpeechResponse> GetAudioAsync(
+        string text,
+        TextToSpeechOptions? options = null,
+        CancellationToken cancellationToken = default);
+
+    /// <summary>Sends text content to the model and streams back the generated audio speech.</summary>
+    /// <param name="text">The text to synthesize into speech.</param>
+    /// <param name="options">The text to speech options to configure the request.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The audio speech updates representing the streamed output.</returns>
+    /// <exception cref="ArgumentNullException"><paramref name="text"/> is <see langword="null"/>.</exception>
+    IAsyncEnumerable<TextToSpeechResponseUpdate> GetStreamingAudioAsync(
+        string text,
+        TextToSpeechOptions? options = null,
+        CancellationToken cancellationToken = default);
+
+    /// <summary>Asks the <see cref="ITextToSpeechClient"/> for an object of the specified type <paramref name="serviceType"/>.</summary>
+    /// <param name="serviceType">The type of object being requested.</param>
+    /// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
+    /// <returns>The found object, otherwise <see langword="null"/>.</returns>
+    /// <exception cref="ArgumentNullException"><paramref name="serviceType"/> is <see langword="null"/>.</exception>
+    /// <remarks>
+    /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the <see cref="ITextToSpeechClient"/>,
+    /// including itself or any services it might be wrapping.
+    /// </remarks>
+    object? GetService(Type serviceType, object? serviceKey = null);
+}
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Shared.DiagnosticIds;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Extensions for <see cref="ITextToSpeechClient"/>.</summary>
+[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
+public static class TextToSpeechClientExtensions
+{
+    /// <summary>Asks the <see cref="ITextToSpeechClient"/> for an object of type <typeparamref name="TService"/>.</summary>
+    /// <typeparam name="TService">The type of the object to be retrieved.</typeparam>
+    /// <param name="client">The client.</param>
+    /// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
+    /// <returns>The found object, otherwise <see langword="null"/>.</returns>
+    /// <remarks>
+    /// The purpose of this method is to allow for the retrieval of strongly typed services that may be provided by the <see cref="ITextToSpeechClient"/>,
+    /// including itself or any services it might be wrapping.
+    /// </remarks>
+    public static TService? GetService<TService>(this ITextToSpeechClient client, object? serviceKey = null)
+    {
+        _ = Throw.IfNull(client);
+
+        return (TService?)client.GetService(typeof(TService), serviceKey);
+    }
+}
@@ -0,0 +1,44 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Shared.DiagnosticIds;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Provides metadata about an <see cref="ITextToSpeechClient"/>.</summary>
+[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
+public class TextToSpeechClientMetadata
+{
+    /// <summary>Initializes a new instance of the <see cref="TextToSpeechClientMetadata"/> class.</summary>
+    /// <param name="providerName">
+    /// The name of the text to speech provider, if applicable. Where possible, this should map to the
+    /// appropriate name defined in the OpenTelemetry Semantic Conventions for Generative AI systems.
+    /// </param>
+    /// <param name="providerUri">The URL for accessing the text to speech provider, if applicable.</param>
+    /// <param name="defaultModelId">The ID of the text to speech model used by default, if applicable.</param>
+    public TextToSpeechClientMetadata(string? providerName = null, Uri? providerUri = null, string? defaultModelId = null)
+    {
+        DefaultModelId = defaultModelId;
+        ProviderName = providerName;
+        ProviderUri = providerUri;
+    }
+
+    /// <summary>Gets the name of the text to speech provider.</summary>
+    /// <remarks>
+    /// Where possible, this maps to the appropriate name defined in the
+    /// OpenTelemetry Semantic Conventions for Generative AI systems.
+    /// </remarks>
+    public string? ProviderName { get; }
+
+    /// <summary>Gets the URL for accessing the text to speech provider.</summary>
+    public Uri? ProviderUri { get; }
+
+    /// <summary>Gets the ID of the default model used by this text to speech client.</summary>
+    /// <remarks>
+    /// This value can be null if either the name is unknown or there are multiple possible models associated with this instance.
+    /// An individual request may override this value via <see cref="TextToSpeechOptions.ModelId"/>.
+    /// </remarks>
+    public string? DefaultModelId { get; }
+}
@@ -0,0 +1,103 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Text.Json.Serialization;
+using Microsoft.Shared.DiagnosticIds;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents the options for a text to speech request.</summary>
+[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
+public class TextToSpeechOptions
+{
+    /// <summary>Initializes a new instance of the <see cref="TextToSpeechOptions"/> class.</summary>
+    public TextToSpeechOptions()
+    {
+    }
+
+    /// <summary>Initializes a new instance of the <see cref="TextToSpeechOptions"/> class, performing a shallow copy of all properties from <paramref name="other"/>.</summary>
+    protected TextToSpeechOptions(TextToSpeechOptions? other)
+    {
+        if (other is null)
+        {
+            return;
+        }
+
+        AdditionalProperties = other.AdditionalProperties?.Clone();
+        AudioFormat = other.AudioFormat;
+        Language = other.Language;
+        ModelId = other.ModelId;
+        Pitch = other.Pitch;
+        RawRepresentationFactory = other.RawRepresentationFactory;
+        Speed = other.Speed;
+        VoiceId = other.VoiceId;
+        Volume = other.Volume;
+    }
+
+    /// <summary>Gets or sets the model ID for the text to speech request.</summary>
+    public string? ModelId { get; set; }
+
+    /// <summary>Gets or sets the voice identifier to use for speech synthesis.</summary>
+    public string? VoiceId { get; set; }
+
+    /// <summary>Gets or sets the language for the generated speech.</summary>
+    /// <remarks>
+    /// This is typically a BCP 47 language tag (e.g., "en-US", "fr-FR").
+    /// </remarks>
+    public string? Language { get; set; }
+
+    /// <summary>Gets or sets the desired audio output format.</summary>
+    /// <remarks>
+    /// This may be a media type (e.g., "audio/mpeg") or a provider-specific format name (e.g., "mp3", "wav", "opus").
+    /// When not specified, the provider's default format is used.
+    /// </remarks>
+    public string? AudioFormat { get; set; }
+
+    /// <summary>Gets or sets the speech speed multiplier.</summary>
+    /// <remarks>
+    /// A value of 1.0 represents normal speed. Values greater than 1.0 increase speed; values less than 1.0 decrease speed.
+    /// The valid range is provider-specific.
+    /// </remarks>
+    public float? Speed { get; set; }
+
+    /// <summary>Gets or sets the speech pitch multiplier.</summary>
+    /// <remarks>
+    /// A value of 1.0 represents normal pitch. Values greater than 1.0 increase pitch; values less than 1.0 decrease pitch.
+    /// The valid range is provider-specific.
+    /// </remarks>
+    public float? Pitch { get; set; }
+
+    /// <summary>Gets or sets the speech volume level.</summary>
+    /// <remarks>
+    /// The valid range and interpretation is provider-specific; a common convention is 0.0 (silent) to 1.0 (full volume).
+    /// </remarks>
+    public float? Volume { get; set; }
+
+    /// <summary>Gets or sets any additional properties associated with the options.</summary>
+    public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
+
+    /// <summary>
+    /// Gets or sets a callback responsible for creating the raw representation of the text to speech options from an underlying implementation.
+    /// </summary>
+    /// <remarks>
+    /// The underlying <see cref="ITextToSpeechClient" /> implementation may have its own representation of options.
+    /// When <see cref="ITextToSpeechClient.GetAudioAsync" /> or <see cref="ITextToSpeechClient.GetStreamingAudioAsync"/>
+    /// is invoked with a <see cref="TextToSpeechOptions" />, that implementation may convert the provided options into
+    /// its own representation in order to use it while performing the operation. For situations where a consumer knows
+    /// which concrete <see cref="ITextToSpeechClient" /> is being used and how it represents options, a new instance of that
+    /// implementation-specific options type may be returned by this callback, for the <see cref="ITextToSpeechClient" />
+    /// implementation to use instead of creating a new instance. Such implementations may mutate the supplied options
+    /// instance further based on other settings supplied on this <see cref="TextToSpeechOptions" /> instance or from other inputs,
+    /// therefore, it is <b>strongly recommended</b> to not return shared instances and instead make the callback return a new instance on each call.
+    /// This is typically used to set an implementation-specific setting that isn't otherwise exposed from the strongly typed
+    /// properties on <see cref="TextToSpeechOptions" />.
+    /// </remarks>
+    [JsonIgnore]
+    public Func<ITextToSpeechClient, object?>? RawRepresentationFactory { get; set; }
+
+    /// <summary>Produces a clone of the current <see cref="TextToSpeechOptions"/> instance.</summary>
+    /// <returns>A clone of the current <see cref="TextToSpeechOptions"/> instance.</returns>
+    public virtual TextToSpeechOptions Clone() => new(this);
+}