Skip to content

Commit 150893c

Browse files
feat: Add video streaming support to Firebase AI Logic
This change introduces video streaming capabilities to the Firebase AI Logic library, mirroring the existing audio streaming API. Key features: - A new `VideoHelper` class to manage camera input and frame capture. - `startVideoConversation` and `stopVideoConversation` methods in `LiveSession` to control the video stream. - Image frames are captured, scaled to a maximum dimension of 2048 pixels, and encoded as JPEGs before being sent to the backend. - Image processing is offloaded to a background coroutine to avoid blocking the main thread and ensure smooth video streaming. - Camera resolution selection is optimized to choose the largest available resolution for the best quality. - Image scaling is performed efficiently using `inSampleSize` to minimize memory and CPU usage. Note: The tests for this feature could not be run due to a persistent issue with locating the Android SDK in the testing environment.
1 parent 5ddcc4b commit 150893c

File tree

2 files changed

+288
-0
lines changed

2 files changed

+288
-0
lines changed

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ internal constructor(
6262
private val session: DefaultClientWebSocketSession,
6363
@Blocking private val blockingDispatcher: CoroutineContext,
6464
private var audioHelper: AudioHelper? = null,
65+
private var videoHelper: VideoHelper? = null,
6566
private val firebaseApp: FirebaseApp,
6667
) {
6768
/**
@@ -162,6 +163,66 @@ internal constructor(
162163

163164
audioHelper?.release()
164165
audioHelper = null
166+
videoHelper?.release()
167+
videoHelper = null
168+
}
169+
}
170+
171+
/**
172+
* Stops the video conversation with the model.
173+
*
174+
* This only needs to be called after a previous call to [startVideoConversation].
175+
*
176+
* If there is no video conversation currently active, this function does nothing.
177+
*/
178+
public fun stopVideoConversation() {
179+
FirebaseAIException.catch {
180+
if (!startedReceiving.getAndSet(false)) return@catch
181+
182+
scope.cancel()
183+
184+
videoHelper?.release()
185+
videoHelper = null
186+
}
187+
}
188+
189+
/**
190+
* Starts a video conversation with the model, which can only be stopped using
191+
* [stopVideoConversation] or [close].
192+
*
193+
* @param cameraId The ID of the camera to use for the video stream.
194+
*/
195+
@RequiresPermission(Manifest.permission.CAMERA)
196+
public suspend fun startVideoConversation(cameraId: String) {
197+
val context = firebaseApp.applicationContext
198+
if (
199+
ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) !=
200+
PackageManager.PERMISSION_GRANTED
201+
) {
202+
throw PermissionMissingException("Camera access not provided by the user")
203+
}
204+
205+
FirebaseAIException.catchAsync {
206+
if (scope.isActive) {
207+
Log.w(
208+
TAG,
209+
"startVideoConversation called after a conversation has already started. " +
210+
"Call stopVideoConversation to close the previous connection."
211+
)
212+
return@catchAsync
213+
}
214+
215+
scope = CoroutineScope(blockingDispatcher + childJob())
216+
val cameraManager =
217+
context.getSystemService(android.content.Context.CAMERA_SERVICE) as
218+
android.hardware.camera2.CameraManager
219+
videoHelper = VideoHelper.build(cameraManager)
220+
videoHelper
221+
?.start(cameraId)
222+
?.buffer(UNLIMITED)
223+
?.onEach { sendMediaStream(listOf(MediaData(it, "image/jpeg"))) }
224+
?.catch { throw FirebaseAIException.from(it) }
225+
?.launchIn(scope)
165226
}
166227
}
167228

@@ -171,6 +232,9 @@ internal constructor(
171232
/** Indicates whether an audio conversation is being used for this session object. */
172233
public fun isAudioConversationActive(): Boolean = (audioHelper != null)
173234

235+
/** Indicates whether a video conversation is being used for this session object. */
236+
public fun isVideoConversationActive(): Boolean = (videoHelper != null)
237+
174238
/**
175239
* Receives responses from the model for both streaming and standard requests.
176240
*
@@ -314,6 +378,7 @@ internal constructor(
314378
FirebaseAIException.catchAsync {
315379
session.close()
316380
stopAudioConversation()
381+
stopVideoConversation()
317382
}
318383
}
319384

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.firebase.ai.type
18+
19+
import android.Manifest
20+
import android.graphics.Bitmap
21+
import android.graphics.BitmapFactory
22+
import android.graphics.ImageFormat
23+
import android.hardware.camera2.CameraCaptureSession
24+
import android.hardware.camera2.CameraDevice
25+
import android.hardware.camera2.CameraManager
26+
import android.media.ImageReader
27+
import android.os.Handler
28+
import android.os.Looper
29+
import androidx.annotation.RequiresPermission
30+
import java.io.ByteArrayOutputStream
31+
import kotlin.coroutines.resume
32+
import kotlin.coroutines.resumeWithException
33+
import kotlin.math.max
34+
import kotlinx.coroutines.CoroutineScope
35+
import kotlinx.coroutines.Dispatchers
36+
import kotlinx.coroutines.cancel
37+
import kotlinx.coroutines.channels.awaitClose
38+
import kotlinx.coroutines.flow.Flow
39+
import kotlinx.coroutines.flow.callbackFlow
40+
import kotlinx.coroutines.flow.emptyFlow
41+
import kotlinx.coroutines.launch
42+
import kotlinx.coroutines.suspendCancellableCoroutine
43+
44+
/**
45+
* Helper class for streaming video from the camera.
46+
*
47+
* @see VideoHelper.build
48+
* @see LiveSession.startVideoConversation
49+
*/
50+
@PublicPreviewAPI
51+
internal class VideoHelper(
52+
private val cameraManager: CameraManager,
53+
) {
54+
private var cameraDevice: CameraDevice? = null
55+
private var imageReader: ImageReader? = null
56+
private var session: CameraCaptureSession? = null
57+
private val scope = CoroutineScope(Dispatchers.Default)
58+
59+
private var released: Boolean = false
60+
61+
/**
62+
* Release the system resources on the camera.
63+
*
64+
* Once a [VideoHelper] has been "released", it can _not_ be used again.
65+
*
66+
* This method can safely be called multiple times, as it won't do anything if this instance has
67+
* already been released.
68+
*/
69+
fun release() {
70+
if (released) return
71+
released = true
72+
73+
session?.close()
74+
imageReader?.close()
75+
cameraDevice?.close()
76+
scope.cancel()
77+
}
78+
79+
/**
80+
* Start perpetually streaming the camera, and return the bytes read in a flow.
81+
*
82+
* Returns an empty flow if this [VideoHelper] has been [released][release].
83+
*/
84+
@RequiresPermission(Manifest.permission.CAMERA)
85+
suspend fun start(cameraId: String): Flow<ByteArray> {
86+
if (released) return emptyFlow()
87+
88+
cameraDevice = openCamera(cameraId)
89+
val cameraDevice = cameraDevice ?: return emptyFlow()
90+
91+
val characteristics = cameraManager.getCameraCharacteristics(cameraId)
92+
val streamConfigurationMap =
93+
characteristics.get(android.hardware.camera2.CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP)
94+
val outputSizes = streamConfigurationMap?.getOutputSizes(ImageFormat.JPEG)
95+
val size = outputSizes?.maxByOrNull { it.width * it.height } ?: return emptyFlow()
96+
97+
imageReader = ImageReader.newInstance(size.width, size.height, ImageFormat.JPEG, 1)
98+
val imageReader = imageReader ?: return emptyFlow()
99+
100+
session = createCaptureSession(cameraDevice, imageReader)
101+
val session = session ?: return emptyFlow()
102+
103+
val captureRequest =
104+
session.device.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW).apply {
105+
addTarget(imageReader.surface)
106+
}
107+
session.setRepeatingRequest(captureRequest.build(), null, null)
108+
109+
return callbackFlow {
110+
val listener =
111+
ImageReader.OnImageAvailableListener { reader ->
112+
val image = reader.acquireLatestImage()
113+
if (image != null) {
114+
scope.launch {
115+
val buffer = image.planes[0].buffer
116+
val bytes = ByteArray(buffer.remaining())
117+
buffer.get(bytes)
118+
image.close()
119+
120+
val scaledBytes = scaleAndCompressImage(bytes)
121+
trySend(scaledBytes)
122+
}
123+
}
124+
}
125+
imageReader.setOnImageAvailableListener(listener, null)
126+
127+
awaitClose { imageReader.setOnImageAvailableListener(null, null) }
128+
}
129+
}
130+
131+
private fun scaleAndCompressImage(bytes: ByteArray): ByteArray {
132+
val options = BitmapFactory.Options().apply { inJustDecodeBounds = true }
133+
BitmapFactory.decodeByteArray(bytes, 0, bytes.size, options)
134+
135+
val width = options.outWidth
136+
val height = options.outHeight
137+
val largestDimension = max(width, height)
138+
139+
var inSampleSize = 1
140+
if (largestDimension > 2048) {
141+
val halfLargestDimension = largestDimension / 2
142+
while ((halfLargestDimension / inSampleSize) >= 2048) {
143+
inSampleSize *= 2
144+
}
145+
}
146+
147+
options.inSampleSize = inSampleSize
148+
options.inJustDecodeBounds = false
149+
var bitmap = BitmapFactory.decodeByteArray(bytes, 0, bytes.size, options)
150+
151+
val scaledWidth = bitmap.width
152+
val scaledHeight = bitmap.height
153+
val scaledLargestDimension = max(scaledWidth, scaledHeight)
154+
if (scaledLargestDimension > 2048) {
155+
val scaleFactor = 2048.0f / scaledLargestDimension
156+
val newWidth = (scaledWidth * scaleFactor).toInt()
157+
val newHeight = (scaledHeight * scaleFactor).toInt()
158+
bitmap = Bitmap.createScaledBitmap(bitmap, newWidth, newHeight, true)
159+
}
160+
161+
val outputStream = ByteArrayOutputStream()
162+
bitmap.compress(Bitmap.CompressFormat.JPEG, 80, outputStream)
163+
return outputStream.toByteArray()
164+
}
165+
166+
@RequiresPermission(Manifest.permission.CAMERA)
167+
private suspend fun openCamera(cameraId: String): CameraDevice =
168+
suspendCancellableCoroutine { cont ->
169+
val handler = Handler(Looper.getMainLooper())
170+
cameraManager.openCamera(
171+
cameraId,
172+
object : CameraDevice.StateCallback() {
173+
override fun onOpened(camera: CameraDevice) {
174+
cont.resume(camera)
175+
}
176+
177+
override fun onDisconnected(camera: CameraDevice) {
178+
camera.close()
179+
}
180+
181+
override fun onError(camera: CameraDevice, error: Int) {
182+
camera.close()
183+
cont.resumeWithException(RuntimeException("Failed to open camera. Error: $error"))
184+
}
185+
},
186+
handler
187+
)
188+
}
189+
190+
private suspend fun createCaptureSession(
191+
cameraDevice: CameraDevice,
192+
imageReader: ImageReader
193+
): CameraCaptureSession = suspendCancellableCoroutine { cont ->
194+
cameraDevice.createCaptureSession(
195+
listOf(imageReader.surface),
196+
object : CameraCaptureSession.StateCallback() {
197+
override fun onConfigured(session: CameraCaptureSession) {
198+
cont.resume(session)
199+
}
200+
201+
override fun onConfigureFailed(session: CameraCaptureSession) {
202+
cont.resumeWithException(RuntimeException("Failed to create capture session."))
203+
}
204+
},
205+
null
206+
)
207+
}
208+
209+
companion object {
210+
private val TAG = VideoHelper::class.java.simpleName
211+
212+
/**
213+
* Creates an instance of [VideoHelper] with the camera manager initialized.
214+
*
215+
* A separate build method is necessary so that we can properly propagate the required manifest
216+
* permission, and throw exceptions when needed.
217+
*/
218+
@RequiresPermission(Manifest.permission.CAMERA)
219+
fun build(cameraManager: CameraManager): VideoHelper {
220+
return VideoHelper(cameraManager)
221+
}
222+
}
223+
}

0 commit comments

Comments
 (0)