Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;

namespace Microsoft.Extensions.AI;

/// <summary>
/// Provides an optional base class for an <see cref="ITextToSpeechClient"/> that passes through calls to another instance.
/// </summary>
/// <remarks>
/// This is recommended as a base type when building clients that can be chained in any order around an underlying <see cref="ITextToSpeechClient"/>.
/// The default implementation simply passes each call to the inner client instance.
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
public class DelegatingTextToSpeechClient : ITextToSpeechClient
{
/// <summary>
/// Initializes a new instance of the <see cref="DelegatingTextToSpeechClient"/> class.
/// </summary>
/// <param name="innerClient">The wrapped client instance.</param>
protected DelegatingTextToSpeechClient(ITextToSpeechClient innerClient)
{
InnerClient = Throw.IfNull(innerClient);
}

/// <inheritdoc />
public void Dispose()
{
Dispose(disposing: true);
GC.SuppressFinalize(this);
}

/// <summary>Gets the inner <see cref="ITextToSpeechClient" />.</summary>
protected ITextToSpeechClient InnerClient { get; }

/// <inheritdoc />
public virtual Task<TextToSpeechResponse> GetAudioAsync(
string text, TextToSpeechOptions? options = null, CancellationToken cancellationToken = default)
{
return InnerClient.GetAudioAsync(text, options, cancellationToken);
}

/// <inheritdoc />
public virtual IAsyncEnumerable<TextToSpeechResponseUpdate> GetStreamingAudioAsync(
string text, TextToSpeechOptions? options = null, CancellationToken cancellationToken = default)
{
return InnerClient.GetStreamingAudioAsync(text, options, cancellationToken);
}

/// <inheritdoc />
public virtual object? GetService(Type serviceType, object? serviceKey = null)
{
_ = Throw.IfNull(serviceType);

// If the key is non-null, we don't know what it means so pass through to the inner service.
return
serviceKey is null && serviceType.IsInstanceOfType(this) ? this :
InnerClient.GetService(serviceType, serviceKey);
}

/// <summary>Provides a mechanism for releasing unmanaged resources.</summary>
/// <param name="disposing"><see langword="true"/> if being called from <see cref="Dispose()"/>; otherwise, <see langword="false"/>.</param>
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
InnerClient.Dispose();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.AI;

/// <summary>Represents a text to speech client.</summary>
/// <remarks>
/// <para>
/// Unless otherwise specified, all members of <see cref="ITextToSpeechClient"/> are thread-safe for concurrent use.
/// It is expected that all implementations of <see cref="ITextToSpeechClient"/> support being used by multiple requests concurrently.
/// </para>
/// <para>
/// However, implementations of <see cref="ITextToSpeechClient"/> might mutate the arguments supplied to <see cref="GetAudioAsync"/> and
/// <see cref="GetStreamingAudioAsync"/>, such as by configuring the options instance. Thus, consumers of the interface either should avoid
/// using shared instances of these arguments for concurrent invocations or should otherwise ensure by construction that no
/// <see cref="ITextToSpeechClient"/> instances are used which might employ such mutation. For example, the ConfigureOptions method may be
/// provided with a callback that could mutate the supplied options argument, and that should be avoided if using a singleton options instance.
/// </para>
/// </remarks>
[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
public interface ITextToSpeechClient : IDisposable
{
/// <summary>Sends text content to the model and returns the generated audio speech.</summary>
/// <param name="text">The text to synthesize into speech.</param>
/// <param name="options">The text to speech options to configure the request.</param>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
/// <returns>The audio speech generated.</returns>
/// <exception cref="ArgumentNullException"><paramref name="text"/> is <see langword="null"/>.</exception>
Task<TextToSpeechResponse> GetAudioAsync(
string text,
TextToSpeechOptions? options = null,
CancellationToken cancellationToken = default);

/// <summary>Sends text content to the model and streams back the generated audio speech.</summary>
/// <param name="text">The text to synthesize into speech.</param>
/// <param name="options">The text to speech options to configure the request.</param>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
/// <returns>The audio speech updates representing the streamed output.</returns>
/// <exception cref="ArgumentNullException"><paramref name="text"/> is <see langword="null"/>.</exception>
IAsyncEnumerable<TextToSpeechResponseUpdate> GetStreamingAudioAsync(
string text,
TextToSpeechOptions? options = null,
CancellationToken cancellationToken = default);

/// <summary>Asks the <see cref="ITextToSpeechClient"/> for an object of the specified type <paramref name="serviceType"/>.</summary>
/// <param name="serviceType">The type of object being requested.</param>
/// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
/// <returns>The found object, otherwise <see langword="null"/>.</returns>
/// <exception cref="ArgumentNullException"><paramref name="serviceType"/> is <see langword="null"/>.</exception>
/// <remarks>
/// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the <see cref="ITextToSpeechClient"/>,
/// including itself or any services it might be wrapping.
/// </remarks>
object? GetService(Type serviceType, object? serviceKey = null);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;
using Microsoft.Shared.DiagnosticIds;
using Microsoft.Shared.Diagnostics;

namespace Microsoft.Extensions.AI;

/// <summary>Extensions for <see cref="ITextToSpeechClient"/>.</summary>
[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
public static class TextToSpeechClientExtensions
{
/// <summary>Asks the <see cref="ITextToSpeechClient"/> for an object of type <typeparamref name="TService"/>.</summary>
/// <typeparam name="TService">The type of the object to be retrieved.</typeparam>
/// <param name="client">The client.</param>
/// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
/// <returns>The found object, otherwise <see langword="null"/>.</returns>
/// <remarks>
/// The purpose of this method is to allow for the retrieval of strongly typed services that may be provided by the <see cref="ITextToSpeechClient"/>,
/// including itself or any services it might be wrapping.
/// </remarks>
public static TService? GetService<TService>(this ITextToSpeechClient client, object? serviceKey = null)
{
_ = Throw.IfNull(client);

return (TService?)client.GetService(typeof(TService), serviceKey);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.AI;

/// <summary>Provides metadata about an <see cref="ITextToSpeechClient"/>.</summary>
[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
public class TextToSpeechClientMetadata
{
/// <summary>Initializes a new instance of the <see cref="TextToSpeechClientMetadata"/> class.</summary>
/// <param name="providerName">
/// The name of the text to speech provider, if applicable. Where possible, this should map to the
/// appropriate name defined in the OpenTelemetry Semantic Conventions for Generative AI systems.
/// </param>
/// <param name="providerUri">The URL for accessing the text to speech provider, if applicable.</param>
/// <param name="defaultModelId">The ID of the text to speech model used by default, if applicable.</param>
public TextToSpeechClientMetadata(string? providerName = null, Uri? providerUri = null, string? defaultModelId = null)
{
DefaultModelId = defaultModelId;
ProviderName = providerName;
ProviderUri = providerUri;
}

/// <summary>Gets the name of the text to speech provider.</summary>
/// <remarks>
/// Where possible, this maps to the appropriate name defined in the
/// OpenTelemetry Semantic Conventions for Generative AI systems.
/// </remarks>
public string? ProviderName { get; }

/// <summary>Gets the URL for accessing the text to speech provider.</summary>
public Uri? ProviderUri { get; }

/// <summary>Gets the ID of the default model used by this text to speech client.</summary>
/// <remarks>
/// This value can be null if either the name is unknown or there are multiple possible models associated with this instance.
/// An individual request may override this value via <see cref="TextToSpeechOptions.ModelId"/>.
/// </remarks>
public string? DefaultModelId { get; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Diagnostics.CodeAnalysis;
using System.Text.Json.Serialization;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.AI;

/// <summary>Represents the options for a text to speech request.</summary>
[Experimental(DiagnosticIds.Experiments.AITextToSpeech, UrlFormat = DiagnosticIds.UrlFormat)]
public class TextToSpeechOptions
{
/// <summary>Initializes a new instance of the <see cref="TextToSpeechOptions"/> class.</summary>
public TextToSpeechOptions()
{
}

/// <summary>Initializes a new instance of the <see cref="TextToSpeechOptions"/> class, performing a shallow copy of all properties from <paramref name="other"/>.</summary>
protected TextToSpeechOptions(TextToSpeechOptions? other)
{
if (other is null)
{
return;
}

AdditionalProperties = other.AdditionalProperties?.Clone();
AudioFormat = other.AudioFormat;
Language = other.Language;
ModelId = other.ModelId;
Pitch = other.Pitch;
RawRepresentationFactory = other.RawRepresentationFactory;
Speed = other.Speed;
VoiceId = other.VoiceId;
Volume = other.Volume;
}

/// <summary>Gets or sets the model ID for the text to speech request.</summary>
public string? ModelId { get; set; }

/// <summary>Gets or sets the voice identifier to use for speech synthesis.</summary>
public string? VoiceId { get; set; }

/// <summary>Gets or sets the language for the generated speech.</summary>
/// <remarks>
/// This is typically a BCP 47 language tag (e.g., "en-US", "fr-FR").
/// </remarks>
public string? Language { get; set; }

/// <summary>Gets or sets the desired audio output format.</summary>
/// <remarks>
/// This may be a media type (e.g., "audio/mpeg") or a provider-specific format name (e.g., "mp3", "wav", "opus").
/// When not specified, the provider's default format is used.
/// </remarks>
public string? AudioFormat { get; set; }

/// <summary>Gets or sets the speech speed multiplier.</summary>
/// <remarks>
/// A value of 1.0 represents normal speed. Values greater than 1.0 increase speed; values less than 1.0 decrease speed.
/// The valid range is provider-specific.
/// </remarks>
public float? Speed { get; set; }

/// <summary>Gets or sets the speech pitch multiplier.</summary>
/// <remarks>
/// A value of 1.0 represents normal pitch. Values greater than 1.0 increase pitch; values less than 1.0 decrease pitch.
/// The valid range is provider-specific.
/// </remarks>
public float? Pitch { get; set; }

/// <summary>Gets or sets the speech volume level.</summary>
/// <remarks>
/// The valid range and interpretation is provider-specific; a common convention is 0.0 (silent) to 1.0 (full volume).
/// </remarks>
public float? Volume { get; set; }

/// <summary>Gets or sets any additional properties associated with the options.</summary>
public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }

/// <summary>
/// Gets or sets a callback responsible for creating the raw representation of the text to speech options from an underlying implementation.
/// </summary>
/// <remarks>
/// The underlying <see cref="ITextToSpeechClient" /> implementation may have its own representation of options.
/// When <see cref="ITextToSpeechClient.GetAudioAsync" /> or <see cref="ITextToSpeechClient.GetStreamingAudioAsync"/>
/// is invoked with a <see cref="TextToSpeechOptions" />, that implementation may convert the provided options into
/// its own representation in order to use it while performing the operation. For situations where a consumer knows
/// which concrete <see cref="ITextToSpeechClient" /> is being used and how it represents options, a new instance of that
/// implementation-specific options type may be returned by this callback, for the <see cref="ITextToSpeechClient" />
/// implementation to use instead of creating a new instance. Such implementations may mutate the supplied options
/// instance further based on other settings supplied on this <see cref="TextToSpeechOptions" /> instance or from other inputs,
/// therefore, it is <b>strongly recommended</b> to not return shared instances and instead make the callback return a new instance on each call.
/// This is typically used to set an implementation-specific setting that isn't otherwise exposed from the strongly typed
/// properties on <see cref="TextToSpeechOptions" />.
/// </remarks>
[JsonIgnore]
public Func<ITextToSpeechClient, object?>? RawRepresentationFactory { get; set; }

/// <summary>Produces a clone of the current <see cref="TextToSpeechOptions"/> instance.</summary>
/// <returns>A clone of the current <see cref="TextToSpeechOptions"/> instance.</returns>
public virtual TextToSpeechOptions Clone() => new(this);
}
Loading
Loading