Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 43 additions & 21 deletions OpenAI/Assets/SampleMenu.unity
Original file line number Diff line number Diff line change
Expand Up @@ -1691,34 +1691,14 @@ GameObject:
m_Component:
- component: {fileID: 1886889380}
- component: {fileID: 1886889379}
- component: {fileID: 1886889378}
- component: {fileID: 1886889381}
m_Layer: 0
m_Name: EventSystem
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!114 &1886889378
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1886889377}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 4f231c4fb786f3946a6b90b886c48677, type: 3}
m_Name:
m_EditorClassIdentifier:
m_SendPointerHoverToParent: 1
m_HorizontalAxis: Horizontal
m_VerticalAxis: Vertical
m_SubmitButton: Submit
m_CancelButton: Cancel
m_InputActionsPerSecond: 10
m_RepeatDelay: 0.5
m_ForceModuleActive: 0
--- !u!114 &1886889379
MonoBehaviour:
m_ObjectHideFlags: 0
Expand Down Expand Up @@ -1749,3 +1729,45 @@ Transform:
m_Father: {fileID: 0}
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1886889381
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1886889377}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 01614664b831546d2ae94a42149d80ac, type: 3}
m_Name:
m_EditorClassIdentifier:
m_SendPointerHoverToParent: 1
m_MoveRepeatDelay: 0.5
m_MoveRepeatRate: 0.1
m_XRTrackingOrigin: {fileID: 0}
m_ActionsAsset: {fileID: -944628639613478452, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_PointAction: {fileID: -1654692200621890270, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_MoveAction: {fileID: -8784545083839296357, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_SubmitAction: {fileID: 392368643174621059, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_CancelAction: {fileID: 7727032971491509709, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_LeftClickAction: {fileID: 3001919216989983466, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_MiddleClickAction: {fileID: -2185481485913320682, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_RightClickAction: {fileID: -4090225696740746782, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_ScrollWheelAction: {fileID: 6240969308177333660, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_TrackedDevicePositionAction: {fileID: 6564999863303420839, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_TrackedDeviceOrientationAction: {fileID: 7970375526676320489, guid: ca9f5fa95ffab41fb9a615ab714db018,
type: 3}
m_DeselectOnBackgroundClick: 1
m_PointerBehavior: 0
m_CursorLockBehavior: 0
m_ScrollDeltaPerTick: 6
46 changes: 37 additions & 9 deletions OpenAI/Packages/com.openai.unity/Runtime/Audio/AudioEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,30 @@ internal AudioEndpoint(OpenAIClient client) : base(client) { }

private static readonly object mutex = new();

[Obsolete("use GetSpeechAsync")]
[Obsolete("use GetSpeechAsync with Func<SpeechClip, Task> overload")]
public async Task<Tuple<string, AudioClip>> CreateSpeechAsync(SpeechRequest request, CancellationToken cancellationToken = default)
=> await CreateSpeechStreamAsync(request, null, cancellationToken);

[Obsolete("use GetSpeechAsync")]
[Obsolete("use GetSpeechAsync with Func<SpeechClip, Task> overload")]
public async Task<Tuple<string, AudioClip>> CreateSpeechStreamAsync(SpeechRequest request, Action<AudioClip> partialClipCallback, CancellationToken cancellationToken = default)
{
var result = await GetSpeechAsync(request, speechClip =>
using var result = await GetSpeechAsync(request, speechClip =>
{
partialClipCallback.Invoke(speechClip.AudioClip);
}, cancellationToken);
return Tuple.Create(result.CachePath, result.AudioClip);
}

[Obsolete("use GetSpeechAsync with Func<SpeechClip, Task> overload")]
public async Task<SpeechClip> GetSpeechAsync(SpeechRequest request, Action<SpeechClip> partialClipCallback, CancellationToken cancellationToken = default)
{
return await GetSpeechAsync(request, partialClipCallback: clip =>
{
partialClipCallback?.Invoke(clip);
return Task.CompletedTask;
}, cancellationToken);
}

/// <summary>
/// Generates audio from the input text.
/// </summary>
Expand All @@ -49,7 +59,7 @@ public async Task<Tuple<string, AudioClip>> CreateSpeechStreamAsync(SpeechReques
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="SpeechClip"/></returns>
[Function("Generates audio from the input text.")]
public async Task<SpeechClip> GetSpeechAsync(SpeechRequest request, Action<SpeechClip> partialClipCallback = null, CancellationToken cancellationToken = default)
public async Task<SpeechClip> GetSpeechAsync(SpeechRequest request, Func<SpeechClip, Task> partialClipCallback = null, CancellationToken cancellationToken = default)
{
if (partialClipCallback != null && request.ResponseFormat != SpeechResponseFormat.PCM)
{
Expand Down Expand Up @@ -86,21 +96,39 @@ public async Task<SpeechClip> GetSpeechAsync(SpeechRequest request, Action<Speec
case SpeechResponseFormat.PCM:
{
var part = 0;
var pcmResponse = await Rest.PostAsync(GetUrl("/speech"), payload, partialResponse =>
var pcmResponse = await Rest.PostAsync(GetUrl("/speech"), payload, async partialResponse =>
{
partialClipCallback?.Invoke(new SpeechClip($"{clipName}_{++part}", null, partialResponse.Data));
}, 8192, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
if (partialClipCallback != null && partialResponse.Data.Length > 0)
{
var partialClip = new SpeechClip($"{clipName}_{++part}", null, partialResponse.Data);

try
{
await partialClipCallback(partialClip).ConfigureAwait(false);
}
finally
{
partialClip.Dispose();
}
}
}, 8192, new RestParameters(client.DefaultRequestHeaders, debug: EnableDebug), cancellationToken);
pcmResponse.Validate(EnableDebug);

if (pcmResponse.Data.Length == 0)
{
throw new Exception("No audio data received!");
}

await File.WriteAllBytesAsync(cachedPath, pcmResponse.Data, cancellationToken).ConfigureAwait(true);
return new SpeechClip(clipName, cachedPath, new ReadOnlyMemory<byte>(pcmResponse.Data));
return new SpeechClip(clipName, cachedPath, pcmResponse.Data);
}
default:
{
var audioResponse = await Rest.PostAsync(GetUrl("/speech"), payload, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
audioResponse.Validate(EnableDebug);
await File.WriteAllBytesAsync(cachedPath, audioResponse.Data, cancellationToken).ConfigureAwait(true);
var audioType = request.ResponseFormat == SpeechResponseFormat.MP3 ? AudioType.MPEG : AudioType.WAV;
var finalClip = await Rest.DownloadAudioClipAsync(cachedPath, audioType, fileName: clipName, cancellationToken: cancellationToken);
var finalClip = await Rest.DownloadAudioClipAsync(cachedPath, audioType, fileName: clipName, parameters: new RestParameters(debug: EnableDebug), cancellationToken: cancellationToken);
return new SpeechClip(clipName, cachedPath, finalClip);
}
}
Expand Down
72 changes: 59 additions & 13 deletions OpenAI/Packages/com.openai.unity/Runtime/Audio/SpeechClip.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System;
using Unity.Collections;
using UnityEngine;
using UnityEngine.Scripting;
using Utilities.Audio;

namespace OpenAI.Audio
{
[Preserve]
public sealed class SpeechClip
public sealed class SpeechClip : IDisposable
{
[Preserve]
internal SpeechClip(string name, string cachePath, AudioClip audioClip)
Expand All @@ -17,34 +18,51 @@ internal SpeechClip(string name, string cachePath, AudioClip audioClip)
CachePath = cachePath;
this.audioClip = audioClip;
SampleRate = audioClip.frequency;
var samples = new float[audioClip.samples];
audioClip.GetData(samples, 0);
AudioData = PCMEncoder.Encode(samples);
audioData = audioClip.EncodeToPCM(allocator: Allocator.Persistent);
}

[Preserve]
internal SpeechClip(string name, string cachePath, ReadOnlyMemory<byte> audioData, int sampleRate = 24000)
internal SpeechClip(string name, string cachePath, byte[] audioData, int sampleRate = 24000)
{
Name = name;
CachePath = cachePath;
AudioData = audioData;
this.audioData = new NativeArray<byte>(audioData, Allocator.Persistent);
SampleRate = sampleRate;
}

~SpeechClip() => Dispose();

[Preserve]
public string Name { get; }

[Preserve]
public string CachePath { get; }

[Preserve]
public ReadOnlyMemory<byte> AudioData { get; }
public NativeArray<byte> AudioData
=> audioData ??= new NativeArray<byte>(0, Allocator.Persistent);
private NativeArray<byte>? audioData;

[Preserve]
public float[] AudioSamples
=> audioSamples ??= PCMEncoder.Decode(AudioData.ToArray(), inputSampleRate: SampleRate, outputSampleRate: AudioSettings.outputSampleRate);
public NativeArray<float> AudioSamples
{
get
{
if (!audioData.HasValue)
{
return new NativeArray<float>(0, Allocator.Persistent);
}

private float[] audioSamples;
audioSamples ??= PCMEncoder.Decode(
pcmData: AudioData,
inputSampleRate: SampleRate,
outputSampleRate: AudioSettings.outputSampleRate,
allocator: Allocator.Persistent);
return audioSamples.Value;
}
}

private NativeArray<float>? audioSamples;

[Preserve]
public int SampleRate { get; }
Expand All @@ -54,25 +72,53 @@ public AudioClip AudioClip
{
get
{
if (audioClip == null)
if (audioClip == null && (audioSamples.HasValue || audioData.HasValue))
{
audioClip = AudioClip.Create(Name, AudioSamples.Length, 1, AudioSettings.outputSampleRate, false);
#if UNITY_6000_0_OR_NEWER
audioClip.SetData(AudioSamples, 0);
#else
audioClip.SetData(AudioSamples.ToArray(), 0);
#endif
}

return audioClip;
}
}

private AudioClip audioClip;

[Preserve]
public float Length => AudioSamples.Length / (float)AudioSettings.outputSampleRate;
public float Length
{
get
{
if (audioClip != null)
{
return audioClip.length;
}

if (!audioSamples.HasValue || !audioData.HasValue)
{
return 0;
}

return AudioSamples.Length / (float)AudioSettings.outputSampleRate;
}
}

[Preserve]
public static implicit operator AudioClip(SpeechClip clip) => clip?.AudioClip;

[Preserve]
public static implicit operator string(SpeechClip clip) => clip?.CachePath;

[Preserve]
public void Dispose()
{
audioSamples?.Dispose();
audioSamples = null;
audioData?.Dispose();
audioData = null;
}
}
}
Loading
Loading