Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion OpenAI-DotNet/Extensions/RealtimeServerEventConverter.cs
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ _ when type.StartsWith("conversation.item.input_audio_transcription") => root.De
"input_audio_buffer.committed" => root.Deserialize<InputAudioBufferCommittedResponse>(options),
"input_audio_buffer.cleared" => root.Deserialize<InputAudioBufferClearedResponse>(options),
"input_audio_buffer.speech_started" => root.Deserialize<InputAudioBufferStartedResponse>(options),
"input_audio_buffer.speech_stopped" => root.Deserialize<InputAudioBufferStoppedResponse>(options),
"input_audio_buffer.speech_stopped" => root.Deserialize<InputAudioBufferStoppedResponse>(options),
"output_audio_buffer.started" => root.Deserialize<OutputAudioBufferStartedResponse>(options),
_ when type.StartsWith("response.audio_transcript") => root.Deserialize<ResponseAudioTranscriptResponse>(options),
_ when type.StartsWith("response.audio") => root.Deserialize<ResponseAudioResponse>(),
_ when type.StartsWith("response.content_part") => root.Deserialize<ResponseContentPartResponse>(options),
Expand Down
1 change: 1 addition & 0 deletions OpenAI-DotNet/OpenAI-DotNet.csproj
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -458,5 +458,6 @@ Version 4.4.0
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="SIPSorcery" Version="8.0.14" />
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob need to remove this before publishing.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It won't be possible to use WebRTC without it.

I do understand if you'd prefer to keep dependencies down and that was what I was getting at in the previous discussion.

The alternative would be a new separate package under RageAgainstThePixel or SIPSorcery.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me dive into the specifics and see if there is a way to sort this out in a way that makes sense and is easy to use.

Copy link
Member

@StephenHodgson StephenHodgson May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have an existing in a production project that uses SIPSorcery and this library, I'll fiddle with it to see what I can come up with while upgrading it from websockets to WebRTC

</ItemGroup>
</Project>
7 changes: 5 additions & 2 deletions OpenAI-DotNet/OpenAIClient.cs
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ public OpenAIClient(OpenAIAuthentication openAIAuthentication = null, OpenAIClie
AssistantsEndpoint = new AssistantsEndpoint(this);
BatchEndpoint = new BatchEndpoint(this);
VectorStoresEndpoint = new VectorStoresEndpoint(this);
RealtimeEndpoint = new RealtimeEndpoint(this);
RealtimeEndpoint = new RealtimeEndpoint(this);
RealtimeEndpointWebRTC = new RealtimeEndpointWebRTC(this);
}

~OpenAIClient() => Dispose(false);
Expand Down Expand Up @@ -219,7 +220,9 @@ private void Dispose(bool disposing)
/// </summary>
public VectorStoresEndpoint VectorStoresEndpoint { get; }

public RealtimeEndpoint RealtimeEndpoint { get; }
public RealtimeEndpoint RealtimeEndpoint { get; }

public RealtimeEndpointWebRTC RealtimeEndpointWebRTC { get; }

#endregion Endpoints

Expand Down
33 changes: 33 additions & 0 deletions OpenAI-DotNet/Realtime/OutputAudioBufferStartedResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Text.Json.Serialization;

namespace OpenAI.Realtime
{
public sealed class OutputAudioBufferStartedResponse : BaseRealtimeEvent, IServerEvent
{
/// <inheritdoc />
[JsonInclude]
[JsonPropertyName("event_id")]
public override string EventId { get; internal set; }

/// <inheritdoc />
[JsonInclude]
[JsonPropertyName("type")]
public override string Type { get; protected set; }

/// <summary>
/// Milliseconds since the session started when speech was detected.
/// </summary>
[JsonInclude]
[JsonPropertyName("audio_start_ms")]
public int AudioStartMs { get; private set; }

/// <summary>
/// The ID of the user message item that will be created when speech stops.
/// </summary>
[JsonInclude]
[JsonPropertyName("item_id")]
public string ItemId { get; private set; }
}
}
200 changes: 200 additions & 0 deletions OpenAI-DotNet/Realtime/RealtimeEndpointWebRTC.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using OpenAI.Extensions;
using SIPSorcery.Media;
using SIPSorcery.Net;
using SIPSorceryMedia.Abstractions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace OpenAI.Realtime
{
public sealed class RealtimeEndpointWebRTC : OpenAIBaseEndpoint
{
private const string OPENAI_DATACHANNEL_NAME = "oai-events";

public readonly AudioEncoder AudioEncoder;

public readonly AudioFormat AudioFormat;

internal RealtimeEndpointWebRTC(OpenAIClient client) : base(client) {
AudioEncoder = new AudioEncoder(includeOpus: true);
AudioFormat = AudioEncoder.SupportedFormats.Single(x => x.FormatName == AudioCodecsEnum.OPUS.ToString());
}

protected override string Root => "realtime";

protected override bool? IsWebSocketEndpoint => false;

private RTCPeerConnection rtcPeerConnection;

public event Action<IPEndPoint, SDPMediaTypesEnum, RTPPacket> OnRtpPacketReceived;

public event Action OnPeerConnectionConnected;

public event Action OnPeerConnectionClosedOrFailed;

/// <summary>
/// Creates a new realtime session with the provided <see cref="SessionConfiguration"/> options.
/// </summary>
/// <param name="configuration"><see cref="SessionConfiguration"/>.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="RealtimeSession"/>.</returns>
public async Task<RealtimeSessionWebRTC> CreateSessionAsync(SessionConfiguration configuration = null, RTCConfiguration rtcConfiguration = null, CancellationToken cancellationToken = default)
{
rtcPeerConnection = await CreatePeerConnection(rtcConfiguration);
var session = new RealtimeSessionWebRTC(rtcPeerConnection, EnableDebug);
var sessionCreatedTcs = new TaskCompletionSource<SessionResponse>();

try
{
session.OnEventReceived += OnEventReceived;
session.OnError += OnError;
var offerSdp = rtcPeerConnection.createOffer();
var answerSdp = await SendSdpAsync(configuration?.Model, offerSdp.sdp);
var setAnswerResult = rtcPeerConnection.setRemoteDescription(
new RTCSessionDescriptionInit { sdp = answerSdp, type = RTCSdpType.answer }
);

if (setAnswerResult != SetDescriptionResultEnum.OK)
{
sessionCreatedTcs.TrySetException(new Exception("WebRTC SDP negotiation failed"));
}

var sessionResponse = await sessionCreatedTcs.Task.WithCancellation(cancellationToken).ConfigureAwait(false);
session.Configuration = sessionResponse.SessionConfiguration;
await session.SendAsync(new UpdateSessionRequest(configuration), cancellationToken: cancellationToken).ConfigureAwait(false);
}
finally
{
session.OnError -= OnError;
session.OnEventReceived -= OnEventReceived;
}

return session;

void OnError(Exception e)
{
sessionCreatedTcs.SetException(e);
}

void OnEventReceived(IRealtimeEvent @event)
{
try
{
switch (@event)
{
case RealtimeConversationResponse:
Console.WriteLine("[conversation.created]");
break;
case SessionResponse sessionResponse:
if (sessionResponse.Type == "session.created")
{
sessionCreatedTcs.TrySetResult(sessionResponse);
}
break;
case RealtimeEventError realtimeEventError:
sessionCreatedTcs.TrySetException(new Exception(realtimeEventError.Error.Message));
break;
}
}
catch (Exception e)
{
Console.WriteLine(e);
sessionCreatedTcs.TrySetException(e);
}
}
}

private async Task<RTCPeerConnection> CreatePeerConnection(RTCConfiguration pcConfig)
{
var peerConnection = new RTCPeerConnection(pcConfig);
MediaStreamTrack audioTrack = new MediaStreamTrack(AudioFormat, MediaStreamStatusEnum.SendRecv);
peerConnection.addTrack(audioTrack);

var dataChannel = await peerConnection.createDataChannel(OPENAI_DATACHANNEL_NAME);

if (EnableDebug)
{
peerConnection.onconnectionstatechange += state => Console.WriteLine($"Peer connection connected changed to {state}.");
peerConnection.OnTimeout += mediaType => Console.WriteLine($"Timeout on media {mediaType}.");
peerConnection.oniceconnectionstatechange += state => Console.WriteLine($"ICE connection state changed to {state}.");

peerConnection.onsignalingstatechange += () =>
{
if (peerConnection.signalingState == RTCSignalingState.have_local_offer)
{
Console.WriteLine($"Local SDP:\n{peerConnection.localDescription.sdp}");
}
else if (peerConnection.signalingState is RTCSignalingState.have_remote_offer or RTCSignalingState.stable)
{
Console.WriteLine($"Remote SDP:\n{peerConnection.remoteDescription?.sdp}");
}
};
}

peerConnection.OnRtpPacketReceived += (ep, mt, rtp) => OnRtpPacketReceived?.Invoke(ep, mt, rtp);

peerConnection.onconnectionstatechange += (state) =>
{
if (state is RTCPeerConnectionState.closed or
RTCPeerConnectionState.failed or
RTCPeerConnectionState.disconnected)
{
OnPeerConnectionClosedOrFailed?.Invoke();
}
};

dataChannel.onopen += () => OnPeerConnectionConnected?.Invoke();

dataChannel.onclose += () => OnPeerConnectionClosedOrFailed?.Invoke();

return peerConnection;
}

public void SendAudio(uint durationRtpUnits, byte[] sample)
{
if(rtcPeerConnection != null && rtcPeerConnection.connectionState == RTCPeerConnectionState.connected)
{
rtcPeerConnection.SendAudio(durationRtpUnits, sample);
}
}

public async Task<string> SendSdpAsync(string model, string offerSdp, CancellationToken cancellationToken = default)
{
model = string.IsNullOrWhiteSpace(model) ? Models.Model.GPT4oRealtime : model;
var queryParameters = new Dictionary<string, string>();

if (client.OpenAIClientSettings.IsAzureOpenAI)
{
queryParameters["deployment"] = model;
}
else
{
queryParameters["model"] = model;
}

var content = new StringContent(offerSdp, Encoding.UTF8);
content.Headers.ContentType = new MediaTypeHeaderValue("application/sdp");

var url = GetUrl(queryParameters: queryParameters);
using var response = await client.Client.PostAsync(GetUrl(queryParameters: queryParameters), content, cancellationToken).ConfigureAwait(false);

if(!response.IsSuccessStatusCode)
{
var errorBody = await response.Content.ReadAsStringAsync();
throw new Exception($"Error sending SDP offer {errorBody}");
}

var sdpAnswer = await response.ReadAsStringAsync(EnableDebug, content, cancellationToken).ConfigureAwait(false);
return sdpAnswer;
}
}
}
Loading