Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion firebase-ai/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

version=17.4.0
version=99.9.0
latestReleasedVersion=17.3.0
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package com.google.firebase.ai.common.util
import android.media.AudioRecord
import kotlin.time.Duration.Companion.milliseconds
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.callbackFlow
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.isActive
import kotlinx.coroutines.yield

/**
Expand All @@ -40,15 +42,13 @@ internal fun AudioRecord.readAsFlow() = flow {

while (true) {
if (recordingState != AudioRecord.RECORDSTATE_RECORDING) {
// TODO(vguthal): Investigate if both yield and delay are required.
delay(10.milliseconds)
yield()
delay(0)
continue
}
val bytesRead = read(buffer, 0, buffer.size)
if (bytesRead > 0) {
emit(buffer.copyOf(bytesRead))
}
yield()
delay(0)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ internal class AudioHelper(
fun build(): AudioHelper {
val playbackTrack =
AudioTrack(
AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION).build(),
AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_MEDIA).setContentType(AudioAttributes.CONTENT_TYPE_SPEECH).build(),
AudioFormat.Builder()
.setSampleRate(24000)
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@ import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession
import io.ktor.websocket.Frame
import io.ktor.websocket.close
import io.ktor.websocket.readBytes
import kotlinx.coroutines.CoroutineName
import java.util.concurrent.ConcurrentLinkedQueue
import java.util.concurrent.atomic.AtomicBoolean
import kotlin.coroutines.CoroutineContext
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.cancel
import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED
import kotlinx.coroutines.delay
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.buffer
import kotlinx.coroutines.flow.catch
Expand All @@ -48,7 +51,6 @@ import kotlinx.coroutines.flow.onCompletion
import kotlinx.coroutines.flow.onEach
import kotlinx.coroutines.isActive
import kotlinx.coroutines.launch
import kotlinx.coroutines.yield
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.Serializable
import kotlinx.serialization.encodeToString
Expand Down Expand Up @@ -119,7 +121,6 @@ internal constructor(
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null,
enableInterruptions: Boolean = false,
) {

val context = firebaseApp.applicationContext
if (
ContextCompat.checkSelfPermission(context, RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED
Expand All @@ -136,8 +137,8 @@ internal constructor(
)
return@catchAsync
}

scope = CoroutineScope(blockingDispatcher + childJob())
// TODO: maybe it should be THREAD_PRIORITY_AUDIO anyways for playback and recording (not network though)
scope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Scope"))
audioHelper = AudioHelper.build()

recordUserAudio()
Expand Down Expand Up @@ -200,7 +201,7 @@ internal constructor(
)
}
?.let { emit(it.toPublic()) }
yield()
delay(0)
}
}
.onCompletion { stopAudioConversation() }
Expand Down Expand Up @@ -270,6 +271,7 @@ internal constructor(
Json.encodeToString(
BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal()
)
println("Sending $jsonString")
session.send(Frame.Text(jsonString))
}
}
Expand Down Expand Up @@ -324,7 +326,10 @@ internal constructor(
?.listenToRecording()
?.buffer(UNLIMITED)
?.accumulateUntil(MIN_BUFFER_SIZE)
?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) }
?.onEach {
sendMediaStream(listOf(MediaData(it, "audio/pcm")))
delay(0)
}
?.catch { throw FirebaseAIException.from(it) }
?.launchIn(scope)
}
Expand Down Expand Up @@ -372,6 +377,7 @@ internal constructor(
if (it.interrupted) {
playBackQueue.clear()
} else {
println("Queuing audio parts from model")
val audioParts = it.content?.parts?.filterIsInstance<InlineDataPart>().orEmpty()
for (part in audioParts) {
playBackQueue.add(part.inlineData)
Expand Down Expand Up @@ -407,14 +413,15 @@ internal constructor(
if (!enableInterruptions) {
audioHelper?.resumeRecording()
}
yield()
delay(0)
} else {
println("Playing audio data")
/**
* We pause the recording while the model is speaking to avoid interrupting it because of
* no echo cancellation
*/
// TODO(b/408223520): Conditionally pause when param is added
if (enableInterruptions != true) {
if (!enableInterruptions) {
audioHelper?.pauseRecording()
}
audioHelper?.playAudio(playbackData)
Expand Down
2 changes: 1 addition & 1 deletion gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ benchmarkMacro = "1.3.4"
browser = "1.3.0"
cardview = "1.0.0"
constraintlayout = "2.1.4"
coroutines = "1.9.0"
coroutines = "1.10.2"
dagger = "2.51" # Don't bump above 2.51 as it causes a bug in AppDistro FeedbackSender JPEG code
datastore = "1.1.7"
dexmaker = "2.28.1"
Expand Down