1+ /*
2+ * Copyright 2024 LiveKit, Inc.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ *
16+ * Originally adapted from: https://github.com/dzolnai/ExoVisualizer
17+ *
18+ * MIT License
19+ *
20+ * Copyright (c) 2019 Dániel Zolnai
21+ *
22+ * Permission is hereby granted, free of charge, to any person obtaining a copy
23+ * of this software and associated documentation files (the "Software"), to deal
24+ * in the Software without restriction, including without limitation the rights
25+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
26+ * copies of the Software, and to permit persons to whom the Software is
27+ * furnished to do so, subject to the following conditions:
28+ *
29+ * The above copyright notice and this permission notice shall be included in all
30+ * copies or substantial portions of the Software.
31+ *
32+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38+ * SOFTWARE.
39+ */
40+
41+ package io.livekit.plugin
42+
43+ import android.media.AudioTrack
44+ import com.paramsen.noise.Noise
45+ import java.nio.ByteBuffer
46+ import java.nio.ByteOrder
47+ import java.util.concurrent.TimeUnit
48+ import kotlin.math.max
49+
50+
51+ /* *
52+ * A Fast Fourier Transform analyzer for audio bytes.
53+ *
54+ * Use [queueInput] to add audio bytes, and collect on [fftFlow]
55+ * to receive the analyzed frequencies.
56+ */
57+ class FFTAudioAnalyzer {
58+
59+ companion object {
60+ const val SAMPLE_SIZE = 512
61+ private val EMPTY_BUFFER = ByteBuffer .allocateDirect(0 ).order(ByteOrder .nativeOrder())
62+
63+ // Extra size next in addition to the AudioTrack buffer size
64+ private const val BUFFER_EXTRA_SIZE = SAMPLE_SIZE * 8
65+
66+ // Size of short in bytes.
67+ private const val SHORT_SIZE = 2
68+ }
69+
70+ val isActive: Boolean
71+ get() = noise != null
72+
73+ private var noise: Noise ? = null
74+ private lateinit var inputAudioFormat: AudioFormat
75+
76+ private var audioTrackBufferSize = 0
77+
78+ private var fftBuffer: ByteBuffer = EMPTY_BUFFER
79+ private lateinit var srcBuffer: ByteBuffer
80+ private var srcBufferPosition = 0
81+ private val tempShortArray = ShortArray (SAMPLE_SIZE )
82+ private val src = FloatArray (SAMPLE_SIZE )
83+
84+ /* *
85+ * A flow of frequencies for the audio bytes given through [queueInput].
86+ */
87+ var fft: FloatArray? = null
88+ private set
89+
90+ fun configure (inputAudioFormat : AudioFormat ) {
91+ this .inputAudioFormat = inputAudioFormat
92+
93+ noise = Noise .real(SAMPLE_SIZE )
94+
95+ audioTrackBufferSize = getDefaultBufferSizeInBytes(inputAudioFormat)
96+
97+ srcBuffer = ByteBuffer .allocate(audioTrackBufferSize + BUFFER_EXTRA_SIZE )
98+ }
99+
100+ fun release () {
101+ noise?.close()
102+ noise = null
103+ }
104+
105+ /* *
106+ * Add audio bytes to be processed.
107+ */
108+ fun queueInput (inputBuffer : ByteBuffer ) {
109+ if (! isActive) {
110+ return
111+ }
112+ var position = inputBuffer.position()
113+ val limit = inputBuffer.limit()
114+ val frameCount = (limit - position) / (SHORT_SIZE * inputAudioFormat.numberOfChannels)
115+ val singleChannelOutputSize = frameCount * SHORT_SIZE
116+
117+ // Setup buffer
118+ if (fftBuffer.capacity() < singleChannelOutputSize) {
119+ fftBuffer =
120+ ByteBuffer .allocateDirect(singleChannelOutputSize).order(ByteOrder .nativeOrder())
121+ } else {
122+ fftBuffer.clear()
123+ }
124+
125+ // Process inputBuffer
126+ while (position < limit) {
127+ var summedUp: Short = 0
128+ for (channelIndex in 0 until inputAudioFormat.numberOfChannels) {
129+ if ( channelIndex == 0 ) {
130+ val current = inputBuffer.getShort(position + 2 * channelIndex)
131+ summedUp = (summedUp + current).toShort()
132+ }
133+ }
134+ fftBuffer.putShort(summedUp)
135+ position + = inputAudioFormat.numberOfChannels * 2
136+ }
137+
138+ // Reset input buffer to original position.
139+ inputBuffer.position(position)
140+
141+ processFFT(this .fftBuffer)
142+ }
143+
144+ private fun processFFT (buffer : ByteBuffer ) {
145+ if (noise == null ) {
146+ return
147+ }
148+ srcBuffer.put(buffer.array())
149+ srcBufferPosition + = buffer.array().size
150+ // Since this is PCM 16 bit, each sample will be 2 bytes.
151+ // So to get the sample size in the end, we need to take twice as many bytes off the buffer
152+ val bytesToProcess = SAMPLE_SIZE * 2
153+ while (srcBufferPosition > bytesToProcess) {
154+ // Move to start of
155+ srcBuffer.position(0 )
156+
157+ srcBuffer.asShortBuffer().get(tempShortArray, 0 , SAMPLE_SIZE )
158+ tempShortArray.forEachIndexed { index, sample ->
159+ // Normalize to value between -1.0 and 1.0
160+ src[index] = sample.toFloat() / Short .MAX_VALUE
161+ }
162+
163+ srcBuffer.position(bytesToProcess)
164+ srcBuffer.compact()
165+ srcBufferPosition - = bytesToProcess
166+ srcBuffer.position(srcBufferPosition)
167+ val dst = FloatArray (SAMPLE_SIZE + 2 )
168+ val fft = noise?.fft(src, dst)!!
169+
170+ this .fft = fft
171+ }
172+ }
173+
174+ private fun durationUsToFrames (sampleRate : Int , durationUs : Long ): Long {
175+ return durationUs * sampleRate / TimeUnit .MICROSECONDS .convert(1 , TimeUnit .SECONDS )
176+ }
177+
178+ private fun getPcmFrameSize (channelCount : Int ): Int {
179+ // assumes PCM_16BIT
180+ return channelCount * 2
181+ }
182+
183+ private fun getAudioTrackChannelConfig (channelCount : Int ): Int {
184+ return when (channelCount) {
185+ 1 -> android.media.AudioFormat .CHANNEL_OUT_MONO
186+ 2 -> android.media.AudioFormat .CHANNEL_OUT_STEREO
187+ // ignore other channel counts that aren't used in LiveKit
188+ else -> android.media.AudioFormat .CHANNEL_INVALID
189+ }
190+ }
191+
192+ private fun getDefaultBufferSizeInBytes (audioFormat : AudioFormat ): Int {
193+ val outputPcmFrameSize = getPcmFrameSize(audioFormat.numberOfChannels)
194+ val minBufferSize =
195+ AudioTrack .getMinBufferSize(
196+ audioFormat.sampleRate,
197+ getAudioTrackChannelConfig(audioFormat.numberOfChannels),
198+ android.media.AudioFormat .ENCODING_PCM_16BIT
199+ )
200+
201+ check(minBufferSize != AudioTrack .ERROR_BAD_VALUE )
202+ val multipliedBufferSize = minBufferSize * 4
203+ val minAppBufferSize =
204+ durationUsToFrames(audioFormat.sampleRate, 30 * 1000 ).toInt() * outputPcmFrameSize
205+ val maxAppBufferSize = max(
206+ minBufferSize.toLong(),
207+ durationUsToFrames(audioFormat.sampleRate, 500 * 1000 ) * outputPcmFrameSize
208+ ).toInt()
209+ val bufferSizeInFrames =
210+ multipliedBufferSize.coerceIn(minAppBufferSize, maxAppBufferSize) / outputPcmFrameSize
211+ return bufferSizeInFrames * outputPcmFrameSize
212+ }
213+ }
214+
215+ data class AudioFormat (val bitsPerSample : Int , val sampleRate : Int , val numberOfChannels : Int )
0 commit comments