OpenVoiceSharp/VoiceChatInterface.cs at master · realcoloride/OpenVoiceSharp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
using OpusDotNet;
using RNNoise.NET;
using WebRtcVadSharp;

namespace OpenVoiceSharp
{
    public sealed class VoiceChatInterface
    {
        /// <summary>
        /// Opus frame length. (20ms)
        /// </summary>
        public const int FrameLength = 20; // 20 ms, for max compatibility
        /// <summary>
        /// Opus/RNNoise/WebRTC sample rate. (48kHz)
        /// </summary>
        public const int SampleRate = 48000; // base opus and RNNoise frequency and webrtc
        /// <summary>
        /// Default bitrate. (16kbps)
        /// </summary>
        public const int DefaultBitrate = 16000; // 16kbps, decent enough for voice chatting

        // properties

        /// <summary>
        /// Defines the quality of the audio data.
        /// A good quality bitrate for voice chatting could be 16 kbps (16000).
        /// </summary>
        public int Bitrate { get; private set; } = DefaultBitrate;
        /// <summary>
        /// Defines if the audio will be in stereo or not. If the input data doesnt match the channel count,
        /// it will be forced into if this is enabled or not.
        /// </summary>
        public bool Stereo { get; private set; } = false;
        /// <summary>
        /// Defines if noise suppression (RNNoise) is enabled. RNNoise runs on CPU.
        /// Disable if this is using too much usage on lower spec devices.
        /// </summary>
        public bool EnableNoiseSuppression { get; set; } = true;
        /// <summary>
        /// Defines if opus should favor the audio quality for audio streaming.
        /// Makes packets bigger with less loss but is useless for simple voice chatting and discussion.
        /// </summary>
        public bool FavorAudioStreaming { get; private set; } = false;

        private int ChannelsAmount => Stereo ? 2 : 1;

        // instances
        private readonly OpusEncoder OpusEncoder;
        private readonly OpusDecoder OpusDecoder;
        private readonly Denoiser Denoiser = new();
        private readonly WebRtcVad VoiceActivityDetector = new()
        {
            FrameLength = WebRtcVadSharp.FrameLength.Is20ms,
            SampleRate = WebRtcVadSharp.SampleRate.Is48kHz
        };


        /// <summary>
        /// Returns if voice activity was detected using the WebRTC VAD.
        /// </summary>
        /// <param name="pcmData">The raw pcm frame (in 16 bit PCM)</param>
        /// <returns>If voice activity was detected in the frame.</returns>
        public bool IsSpeaking(byte[] pcmData) => VoiceActivityDetector.HasSpeech(pcmData);

        // stores float samples if needed
        private readonly float[] FloatSamples;

        private void ApplyNoiseSuppression(byte[] pcmData)
        {
            // convert to float32
            VoiceUtilities.Convert16BitToFloat(pcmData, FloatSamples);

            // apply noise suppression
            Denoiser.Denoise(FloatSamples);

            // convert back to 16 bit pcm
            VoiceUtilities.ConvertFloatTo16Bit(FloatSamples, pcmData);
        }

        /// <summary>
        /// Encodes and processes audio data. Also handles noise suppression if needed.
        /// </summary>
        /// <param name="pcmData">The 16 bit PCM data according to your needs.</param>
        /// <returns>The encoded Opus data, along with its length.</returns>
        public (byte[] encodedOpusData, int encodedLength) SubmitAudioData(byte[] pcmData, int length)
        {
            if (EnableNoiseSuppression)
                ApplyNoiseSuppression(pcmData);

            return (OpusEncoder.Encode(pcmData, length, out int encodedLength), encodedLength);
        }

        /// <summary>
        /// Decodes the opus packet.
        /// </summary>
        /// <param name="encodedData">The encoded 16 bit PCM opus data</param>
        /// <param name="length">The length of the data</param>
        /// <returns>The decoded Opus data, along with its length.</returns>
        public (byte[] decodedOpusData, int decodedLength) WhenDataReceived(byte[] encodedData, int length)
            => (OpusDecoder.Decode(encodedData, length, out int decodedLength), decodedLength);

        /// <summary>
        /// Creates a brand new OpenVoiceSharp voice chat interface to manage voice chat.
        /// </summary>
        /// <param name="bitrate">Quality of the audio.</param>
        /// <param name="stereo">Handle stereo?</param>
        /// <param name="enableNoiseSuppression">Enable RNNoise basic noise suppression.</param>
        /// <param name="favorAudioStreaming">Favor audio streaming and less compressed packets to favor audio quality.</param>
        /// <param name="vadOperatingMode">The VAD (voice activity detection) operating mode.</param>
        public VoiceChatInterface(
            int bitrate = DefaultBitrate,
            bool stereo = false,
            bool enableNoiseSuppression = true,
            bool favorAudioStreaming = false,
            OperatingMode? vadOperatingMode = null
        ) {
            Bitrate = bitrate;
            Stereo = stereo;
            EnableNoiseSuppression = enableNoiseSuppression;
            FavorAudioStreaming = favorAudioStreaming;
            int channels = ChannelsAmount;

            // fill float samples for noise suppression
            FloatSamples = new float[VoiceUtilities.GetSampleSize(channels) / 2];

            // create opus encoder/decoder
            OpusEncoder = new(
                FavorAudioStreaming ? Application.Audio : Application.VoIP,
                SampleRate,
                channels
            ) {
                Bitrate = Bitrate,
                VBR = false,
                ForceChannels = Stereo ? ForceChannels.Stereo : ForceChannels.Mono
            };

            OpusDecoder = new(FrameLength, SampleRate, channels);

            if (vadOperatingMode != null)
                VoiceActivityDetector.OperatingMode = (OperatingMode)vadOperatingMode;
        }
    }
}