Skip to content

Commit 4409dae

Browse files
Add possibility to share screen audio when screensharing is enabled (#1570)
* Capture screen audio when sharing screen and mix it audio from microphone * api dump * 1. Do-not disable and unpublish the audio track if user has muted but audio screen sharing is present 2. If the user was muted while screen sharing, stopping screen sharing properly unpublishes the audio stream and sets the mute state. * Create and publish audioTrack if it is not present when starting screenshare with audio * Add enableAudio flag while starting screenshare * Fixed MicrophoneManagerTest * Revert changes to show screenshare option only in tablets * No need of the check of screenshare as we are not unpublishing local audio while screensharing * if local peer's mic is muted then the screen-share audio will also not be transmitted * Reformating the audio mixing code --------- Co-authored-by: Aleksandar Apostolov <[email protected]>
1 parent 1fef2b8 commit 4409dae

File tree

8 files changed

+353
-28
lines changed

8 files changed

+353
-28
lines changed

demo-app/src/main/kotlin/io/getstream/video/android/ui/call/CallScreen.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ fun CallScreen(
291291
contract = ActivityResultContracts.StartActivityForResult(),
292292
onResult = {
293293
if (it.resultCode == Activity.RESULT_OK && it.data != null) {
294-
call.startScreenSharing(it.data!!)
294+
call.startScreenSharing(it.data!!, includeAudio = true)
295295
}
296296
},
297297
)

stream-video-android-core/api/stream-video-android-core.api

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7652,7 +7652,8 @@ public final class io/getstream/video/android/core/Call {
76527652
public final fun startClosedCaptions (Lkotlin/coroutines/Continuation;)Ljava/lang/Object;
76537653
public final fun startHLS (Lkotlin/coroutines/Continuation;)Ljava/lang/Object;
76547654
public final fun startRecording (Lkotlin/coroutines/Continuation;)Ljava/lang/Object;
7655-
public final fun startScreenSharing (Landroid/content/Intent;)V
7655+
public final fun startScreenSharing (Landroid/content/Intent;Z)V
7656+
public static synthetic fun startScreenSharing$default (Lio/getstream/video/android/core/Call;Landroid/content/Intent;ZILjava/lang/Object;)V
76567657
public final fun startTranscription (Lkotlin/coroutines/Continuation;)Ljava/lang/Object;
76577658
public final fun stopClosedCaptions (Lkotlin/coroutines/Continuation;)Ljava/lang/Object;
76587659
public final fun stopHLS (Lkotlin/coroutines/Continuation;)Ljava/lang/Object;
@@ -8359,8 +8360,9 @@ public final class io/getstream/video/android/core/ScreenShareManager {
83598360
public fun <init> (Lio/getstream/video/android/core/MediaManagerImpl;Lorg/webrtc/EglBase$Context;)V
83608361
public final fun disable (Z)V
83618362
public static synthetic fun disable$default (Lio/getstream/video/android/core/ScreenShareManager;ZILjava/lang/Object;)V
8362-
public final fun enable (Landroid/content/Intent;Z)V
8363-
public static synthetic fun enable$default (Lio/getstream/video/android/core/ScreenShareManager;Landroid/content/Intent;ZILjava/lang/Object;)V
8363+
public final fun enable (Landroid/content/Intent;ZZ)V
8364+
public static synthetic fun enable$default (Lio/getstream/video/android/core/ScreenShareManager;Landroid/content/Intent;ZZILjava/lang/Object;)V
8365+
public final fun getAudioEnabled ()Lkotlinx/coroutines/flow/StateFlow;
83648366
public final fun getEglBaseContext ()Lorg/webrtc/EglBase$Context;
83658367
public final fun getMediaManager ()Lio/getstream/video/android/core/MediaManagerImpl;
83668368
public final fun getStatus ()Lkotlinx/coroutines/flow/StateFlow;

stream-video-android-core/src/main/kotlin/io/getstream/video/android/core/Call.kt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,10 +1174,13 @@ public class Call(
11741174
* MediaProjectionManager.createScreenCaptureIntent().
11751175
* See https://developer.android.com/guide/topics/large-screens/media-projection#recommended_approach
11761176
*/
1177-
fun startScreenSharing(mediaProjectionPermissionResultData: Intent) {
1177+
fun startScreenSharing(
1178+
mediaProjectionPermissionResultData: Intent,
1179+
includeAudio: Boolean = false,
1180+
) {
11781181
if (state.ownCapabilities.value.contains(OwnCapability.Screenshare)) {
11791182
session?.setScreenShareTrack()
1180-
screenShare.enable(mediaProjectionPermissionResultData)
1183+
screenShare.enable(mediaProjectionPermissionResultData, includeAudio = includeAudio)
11811184
} else {
11821185
logger.w { "Can't start screen sharing - user doesn't have wnCapability.Screenshare permission" }
11831186
}

stream-video-android-core/src/main/kotlin/io/getstream/video/android/core/MediaManager.kt

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,24 @@
1616

1717
package io.getstream.video.android.core
1818

19+
import android.Manifest
1920
import android.content.ComponentName
2021
import android.content.Context
2122
import android.content.Intent
2223
import android.content.ServiceConnection
24+
import android.content.pm.PackageManager
2325
import android.hardware.camera2.CameraCharacteristics
2426
import android.hardware.camera2.CameraManager
2527
import android.media.AudioAttributes
28+
import android.media.AudioFormat
2629
import android.media.AudioManager
30+
import android.media.AudioPlaybackCaptureConfiguration
31+
import android.media.AudioRecord
32+
import android.media.AudioRecord.READ_BLOCKING
2733
import android.media.projection.MediaProjection
2834
import android.os.Build
2935
import android.os.IBinder
36+
import androidx.core.app.ActivityCompat
3037
import androidx.core.content.ContextCompat
3138
import androidx.core.content.getSystemService
3239
import com.twilio.audioswitch.AudioDevice
@@ -66,6 +73,7 @@ import org.webrtc.VideoSource
6673
import org.webrtc.VideoTrack
6774
import stream.video.sfu.models.AudioBitrateProfile
6875
import stream.video.sfu.models.VideoDimension
76+
import java.nio.ByteBuffer
6977
import java.util.UUID
7078
import kotlin.coroutines.resumeWithException
7179

@@ -257,6 +265,16 @@ class ScreenShareManager(
257265
internal val screenShareResolution = VideoDimension(1920, 1080)
258266
internal val screenShareBitrate = 1_000_000
259267
internal val screenShareFps = 15
268+
private const val INPUT_NUM_OF_CHANNELS = 1 // 1 for mono, 2 for stereo output
269+
270+
// Requested size of each recorded buffer provided to the client.
271+
private const val CALLBACK_BUFFER_SIZE_MS = 10
272+
273+
// Average number of callbacks per second.
274+
private const val BUFFERS_PER_SECOND = 1000 / CALLBACK_BUFFER_SIZE_MS
275+
276+
// Bits per sample (16-bit PCM)
277+
private const val INPUT_BITS_PER_SAMPLE = 16
260278
}
261279

262280
private val logger by taggedLogger("Media:ScreenShareManager")
@@ -266,11 +284,22 @@ class ScreenShareManager(
266284

267285
public val isEnabled: StateFlow<Boolean> = _status.mapState { it is DeviceStatus.Enabled }
268286

287+
private val _audioEnabled = MutableStateFlow<Boolean>(false)
288+
289+
/** Represents whether screen share audio is enabled */
290+
public val audioEnabled: StateFlow<Boolean> = _audioEnabled
291+
269292
private lateinit var screenCapturerAndroid: ScreenCapturerAndroid
270293
internal lateinit var surfaceTextureHelper: SurfaceTextureHelper
271294
private var setupCompleted = false
272295
private var isScreenSharing = false
273296
private var mediaProjectionPermissionResultData: Intent? = null
297+
private var mediaProjection: MediaProjection? = null
298+
private var screenAudioRecord: AudioRecord? = null
299+
private val inputSampleRate = 48000 // Standard WebRTC sample rate
300+
301+
// ByteBuffer for reading screen audio on demand
302+
private var screenAudioBuffer: ByteBuffer? = null
274303

275304
/**
276305
* The [ServiceConnection.onServiceConnected] is called when our [StreamScreenShareService]
@@ -311,17 +340,26 @@ class ScreenShareManager(
311340
0,
312341
)
313342

343+
// Get MediaProjection from ScreenCapturerAndroid
344+
mediaProjection = screenCapturerAndroid.mediaProjection
345+
346+
// Start screen audio capture only if audio is enabled
347+
if (_audioEnabled.value) {
348+
startScreenAudioCapture()
349+
}
350+
314351
isScreenSharing = true
315352
}
316353

317354
override fun onServiceDisconnected(name: ComponentName) {}
318355
}
319356

320-
fun enable(mediaProjectionPermissionResultData: Intent, fromUser: Boolean = true) {
357+
fun enable(mediaProjectionPermissionResultData: Intent, fromUser: Boolean = true, includeAudio: Boolean = false) {
321358
mediaManager.screenShareTrack.setEnabled(true)
322359
if (fromUser) {
323360
_status.value = DeviceStatus.Enabled
324361
}
362+
_audioEnabled.value = includeAudio
325363
setup()
326364
startScreenShare(mediaProjectionPermissionResultData)
327365
}
@@ -330,17 +368,124 @@ class ScreenShareManager(
330368
if (fromUser) {
331369
_status.value = DeviceStatus.Disabled
332370
}
371+
_audioEnabled.value = false
333372

334373
if (isScreenSharing) {
335374
mediaManager.screenShareTrack.setEnabled(false)
336375
screenCapturerAndroid.stopCapture()
376+
stopScreenAudioCapture()
337377
mediaManager.context.stopService(
338378
Intent(mediaManager.context, StreamScreenShareService::class.java),
339379
)
340380
isScreenSharing = false
341381
}
342382
}
343383

384+
/**
385+
* Gets the next set of screen audio bytes on demand by reading directly from AudioRecord.
386+
* Returns null if screen audio capture is not active.
387+
* This method is called from the AudioBufferCallback in StreamPeerConnectionFactory when mixing is needed.
388+
*
389+
* @param bytesRequested The number of bytes requested
390+
* @return ByteBuffer containing the requested bytes (may have fewer bytes if not enough data is available), or null if no data
391+
*/
392+
internal fun getScreenAudioBytes(bytesRequested: Int): ByteBuffer? {
393+
val record = screenAudioRecord ?: return null
394+
395+
if (bytesRequested <= 0) return null
396+
397+
// Ensure buffer has enough capacity
398+
val buffer = screenAudioBuffer?.takeIf { it.capacity() >= bytesRequested }
399+
?: ByteBuffer.allocateDirect(bytesRequested).also { screenAudioBuffer = it }
400+
401+
buffer.clear()
402+
buffer.limit(bytesRequested)
403+
404+
// Read directly from AudioRecord using READ_BLOCKING mode
405+
val bytesRead = record.read(buffer, bytesRequested, READ_BLOCKING)
406+
407+
if (bytesRead > 0) {
408+
buffer.limit(bytesRead)
409+
// Return a duplicate to avoid position/limit conflicts with concurrent access
410+
return buffer
411+
}
412+
413+
return null
414+
}
415+
416+
/**
417+
* Starts capturing screen audio using AudioRecord with AudioPlaybackCaptureConfiguration.
418+
*/
419+
private fun startScreenAudioCapture() {
420+
val mediaProj = mediaProjection ?: run {
421+
logger.e { "MediaProjection is null, cannot start screen audio capture" }
422+
return
423+
}
424+
425+
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.Q) {
426+
logger.w { "Screen audio capture requires Android Q (API 29) or higher" }
427+
return
428+
}
429+
430+
if (ActivityCompat.checkSelfPermission(
431+
mediaManager.context,
432+
Manifest.permission.RECORD_AUDIO,
433+
) != PackageManager.PERMISSION_GRANTED
434+
) {
435+
logger.w { "RECORD_AUDIO permission not granted, cannot capture screen audio" }
436+
return
437+
}
438+
439+
try {
440+
// Calculate buffer size using the correct formula
441+
val bytesPerFrame: Int = INPUT_NUM_OF_CHANNELS * (INPUT_BITS_PER_SAMPLE / 8)
442+
val capacity = bytesPerFrame * (inputSampleRate / BUFFERS_PER_SECOND)
443+
444+
// Create ByteBuffer for reading audio on demand
445+
screenAudioBuffer = ByteBuffer.allocateDirect(capacity)
446+
447+
val format = AudioFormat.Builder()
448+
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
449+
.setSampleRate(inputSampleRate)
450+
.setChannelMask(AudioFormat.CHANNEL_IN_MONO)
451+
.build()
452+
453+
val playbackConfig = AudioPlaybackCaptureConfiguration.Builder(mediaProj)
454+
.addMatchingUsage(AudioAttributes.USAGE_MEDIA)
455+
.build()
456+
457+
screenAudioRecord = AudioRecord.Builder()
458+
.setAudioFormat(format)
459+
.setAudioPlaybackCaptureConfig(playbackConfig)
460+
.build()
461+
462+
screenAudioRecord?.startRecording()
463+
464+
logger.d { "Screen audio capture started" }
465+
} catch (e: Exception) {
466+
logger.e(e) { "Failed to start screen audio capture" }
467+
}
468+
}
469+
470+
/**
471+
* Stops capturing screen audio and releases resources.
472+
*/
473+
private fun stopScreenAudioCapture() {
474+
try {
475+
screenAudioRecord?.stop()
476+
screenAudioRecord?.release()
477+
screenAudioRecord = null
478+
logger.d { "Screen audio capture stopped" }
479+
} catch (e: Exception) {
480+
logger.e(e) { "Error stopping screen audio capture" }
481+
}
482+
483+
// Note: MediaProjection is managed by ScreenCapturerAndroid and will be stopped
484+
// when screenCapturerAndroid.stopCapture() is called, so we don't need to stop it here
485+
mediaProjection = null
486+
screenAudioBuffer = null
487+
}
488+
344489
private fun startScreenShare(mediaProjectionPermissionResultData: Intent) {
345490
mediaManager.scope.launch {
346491
this@ScreenShareManager.mediaProjectionPermissionResultData =

stream-video-android-core/src/main/kotlin/io/getstream/video/android/core/call/RtcSession.kt

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,34 @@ public class RtcSession internal constructor(
314314
return setTrack(sessionId, type, track)
315315
}
316316

317+
/**
318+
* Creates and publishes an audio track for transmitting audio.
319+
* This is used both when microphone is enabled and when screen sharing starts with muted microphone.
320+
*/
321+
private suspend fun createAndPublishAudioTrack() {
322+
val canUserSendAudio = call.state.ownCapabilities.value.contains(
323+
OwnCapability.SendAudio,
324+
)
325+
if (!canUserSendAudio) {
326+
return
327+
}
328+
329+
setMuteState(isEnabled = true, TrackType.TRACK_TYPE_AUDIO)
330+
val streamId = buildTrackId(TrackType.TRACK_TYPE_AUDIO)
331+
val track = publisher?.publishStream(
332+
streamId,
333+
TrackType.TRACK_TYPE_AUDIO,
334+
)
335+
336+
setLocalTrack(
337+
TrackType.TRACK_TYPE_AUDIO,
338+
AudioTrack(
339+
streamId = streamId,
340+
audio = track as org.webrtc.AudioTrack,
341+
),
342+
)
343+
}
344+
317345
/**
318346
* Connection and WebRTC.
319347
*/
@@ -399,6 +427,16 @@ public class RtcSession internal constructor(
399427
sampleData = sampleData,
400428
)
401429
}
430+
431+
// Set up screen audio bytes provider for mixing with microphone audio during screen sharing
432+
call.peerConnectionFactory.setScreenAudioBytesProvider { bytesRequested ->
433+
call.mediaManager.screenShare.getScreenAudioBytes(bytesRequested)
434+
}
435+
436+
// Set up microphone enabled provider to check if microphone should be included in mixing
437+
call.peerConnectionFactory.setMicrophoneEnabledProvider {
438+
call.mediaManager.microphone.isEnabled.value
439+
}
402440
}
403441

404442
private var participantsMonitoringJob: Job? = null
@@ -650,27 +688,8 @@ public class RtcSession internal constructor(
650688

651689
coroutineScope.launch {
652690
call.mediaManager.microphone.status.collectLatest {
653-
val canUserSendAudio = call.state.ownCapabilities.value.contains(
654-
OwnCapability.SendAudio,
655-
)
656-
657691
if (it == DeviceStatus.Enabled) {
658-
if (canUserSendAudio) {
659-
setMuteState(isEnabled = true, TrackType.TRACK_TYPE_AUDIO)
660-
val streamId = buildTrackId(TrackType.TRACK_TYPE_AUDIO)
661-
val track = publisher?.publishStream(
662-
streamId,
663-
TrackType.TRACK_TYPE_AUDIO,
664-
)
665-
666-
setLocalTrack(
667-
TrackType.TRACK_TYPE_AUDIO,
668-
AudioTrack(
669-
streamId = streamId,
670-
audio = track as org.webrtc.AudioTrack,
671-
),
672-
)
673-
}
692+
createAndPublishAudioTrack()
674693
} else {
675694
setMuteState(isEnabled = false, TrackType.TRACK_TYPE_AUDIO)
676695
publisher?.unpublishStream(TrackType.TRACK_TYPE_AUDIO)

0 commit comments

Comments
 (0)