Skip to content

Commit 77f1378

Browse files
tyler-albertcopybara-github
authored andcommitted
feat: Add field for indicating whether or not audio frames are from the loudest speaker
PiperOrigin-RevId: 722391150
1 parent bc7573a commit 77f1378

File tree

3 files changed

+87
-38
lines changed

3 files changed

+87
-38
lines changed

native_with_state/api/media_api_client_interface.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct AudioFrame {
121121
int sample_rate;
122122
size_t number_of_channels;
123123
size_t number_of_frames;
124+
bool is_from_loudest_speaker;
124125
/// Contributing source (CSRC) of the current audio frame. This ID is used to
125126
/// identify which participant in the conference generated the frame.
126127
/// Integrators can cross reference this value with values pushed from Meet

native_with_state/internal/conference_media_tracks.cc

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,32 @@ void ConferenceAudioTrack::OnData(
4040
<< ". Expected 16.";
4141
return;
4242
}
43+
4344
// Audio data is expected to be in PCM format, where each sample is 16 bits.
4445
const auto* pcm_data = reinterpret_cast<const int16_t*>(audio_data);
4546

47+
bool is_from_loudest_speaker = false;
4648
std::optional<uint32_t> csrc;
4749
std::optional<uint32_t> ssrc;
4850
// Audio csrcs and ssrcs are not included in the audio data. Therefore,
4951
// extract them from the RtpReceiver.
5052
for (const auto& rtp_source : receiver_->GetSources()) {
51-
// It is expected that there will be only one CSRC and SSRC per audio frame.
53+
// It is expected that there may be 1 or 2 contributing sources. The
54+
// contributing source corresponding to the participant's audio stream will
55+
// always be present. Meet may also send a contributing source with value
56+
// `kLoudestSpeakerCsrc` to indicate that this audio stream is from the
57+
// loudest speaker.
58+
//
59+
// Knowing the loudest speaker can be useful, as it can be used to determine
60+
// which participant to prioritize when rendering audio or video (although
61+
// other methods may be used as well).
5262
if (rtp_source.source_type() == webrtc::RtpSourceType::CSRC) {
53-
csrc = rtp_source.source_id();
54-
}
55-
if (rtp_source.source_type() == webrtc::RtpSourceType::SSRC) {
63+
if (rtp_source.source_id() == kLoudestSpeakerCsrc) {
64+
is_from_loudest_speaker = true;
65+
} else {
66+
csrc = rtp_source.source_id();
67+
}
68+
} else if (rtp_source.source_type() == webrtc::RtpSourceType::SSRC) {
5669
ssrc = rtp_source.source_id();
5770
}
5871
}
@@ -67,11 +80,6 @@ void ConferenceAudioTrack::OnData(
6780
return;
6881
}
6982

70-
if (*csrc == kLoudestSpeakerCsrc) {
71-
LOG(INFO) << "Ignoring loudest speaker indicator for mid: " << mid_;
72-
return;
73-
}
74-
7583
// Audio data in PCM format is expected to be stored in a contiguous buffer,
7684
// where there are `number_of_channels * number_of_frames` audio frames.
7785
absl::Span<const int16_t> pcm_data_span =
@@ -81,6 +89,7 @@ void ConferenceAudioTrack::OnData(
8189
.sample_rate = sample_rate,
8290
.number_of_channels = number_of_channels,
8391
.number_of_frames = number_of_frames,
92+
.is_from_loudest_speaker = is_from_loudest_speaker,
8493
.contributing_source = csrc.value(),
8594
.synchronization_source = ssrc.value()});
8695
};

native_with_state/internal/conference_media_tracks_test.cc

Lines changed: 68 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include "testing/base/public/mock-log.h"
2828
#include "absl/base/log_severity.h"
2929
#include "native_with_state/api/media_api_client_interface.h"
30-
#include "webrtc/api/rtp_headers.h"
3130
#include "webrtc/api/rtp_packet_info.h"
3231
#include "webrtc/api/rtp_packet_infos.h"
3332
#include "webrtc/api/scoped_refptr.h"
@@ -41,7 +40,6 @@ namespace meet {
4140
namespace {
4241

4342
using ::base_logging::ERROR;
44-
using ::base_logging::INFO;
4543
using ::testing::_;
4644
using ::testing::kDoNotCaptureLogsYet;
4745
using ::testing::MockFunction;
@@ -50,23 +48,71 @@ using ::testing::ScopedMockLog;
5048
using ::testing::SizeIs;
5149
using ::testing::UnorderedElementsAre;
5250

53-
TEST(ConferenceAudioTrackTest, CallsObserverWithAudioFrame) {
54-
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
51+
TEST(ConferenceAudioTrackTest, CallsObserverWithAudioFrameFromLoudestSpeaker) {
52+
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
5553
new webrtc::MockRtpReceiver());
5654
webrtc::RtpSource csrc_rtp_source(
5755
webrtc::Timestamp::Micros(1234567890),
5856
/*source_id=*/123, webrtc::RtpSourceType::CSRC,
5957
/*rtp_timestamp=*/1111111,
60-
{.audio_level = 100,
61-
.absolute_capture_time =
62-
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
58+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
59+
webrtc::RtpSource loudest_speaker_csrc_rtp_source(
60+
webrtc::Timestamp::Micros(1234567890),
61+
/*source_id=*/kLoudestSpeakerCsrc, webrtc::RtpSourceType::CSRC,
62+
/*rtp_timestamp=*/1111111,
63+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
6364
webrtc::RtpSource ssrc_rtp_source(
6465
webrtc::Timestamp::Micros(1234567890),
6566
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
6667
/*rtp_timestamp=*/2222222,
67-
{.audio_level = 100,
68-
.absolute_capture_time =
69-
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
68+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
69+
EXPECT_CALL(*mock_receiver, GetSources)
70+
.WillOnce(Return(std::vector<webrtc::RtpSource>{
71+
std::move(csrc_rtp_source),
72+
std::move(loudest_speaker_csrc_rtp_source),
73+
std::move(ssrc_rtp_source)}));
74+
MockFunction<void(AudioFrame)> mock_function;
75+
std::optional<AudioFrame> received_frame;
76+
EXPECT_CALL(mock_function, Call)
77+
.WillOnce([&received_frame](AudioFrame frame) {
78+
received_frame = std::move(frame);
79+
});
80+
ConferenceAudioTrack audio_track("mid", mock_receiver,
81+
mock_function.AsStdFunction());
82+
int16_t pcm_data[2 * 100];
83+
84+
audio_track.OnData(pcm_data,
85+
/*bits_per_sample=*/16,
86+
/*sample_rate=*/48000,
87+
/*number_of_channels=*/2,
88+
/*number_of_frames=*/100,
89+
/*absolute_capture_timestamp_ms=*/std::nullopt);
90+
91+
ASSERT_TRUE(received_frame.has_value());
92+
EXPECT_THAT(received_frame->pcm16, SizeIs(100 * 2));
93+
EXPECT_EQ(received_frame->bits_per_sample, 16);
94+
EXPECT_EQ(received_frame->sample_rate, 48000);
95+
EXPECT_EQ(received_frame->number_of_channels, 2);
96+
EXPECT_EQ(received_frame->number_of_frames, 100);
97+
EXPECT_TRUE(received_frame->is_from_loudest_speaker);
98+
EXPECT_EQ(received_frame->contributing_source, 123);
99+
EXPECT_EQ(received_frame->synchronization_source, 456);
100+
}
101+
102+
TEST(ConferenceAudioTrackTest,
103+
CallsObserverWithAudioFrameFromNonLoudestSpeaker) {
104+
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
105+
new webrtc::MockRtpReceiver());
106+
webrtc::RtpSource csrc_rtp_source(
107+
webrtc::Timestamp::Micros(1234567890),
108+
/*source_id=*/123, webrtc::RtpSourceType::CSRC,
109+
/*rtp_timestamp=*/1111111,
110+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
111+
webrtc::RtpSource ssrc_rtp_source(
112+
webrtc::Timestamp::Micros(1234567890),
113+
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
114+
/*rtp_timestamp=*/2222222,
115+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
70116
EXPECT_CALL(*mock_receiver, GetSources)
71117
.WillOnce(Return(std::vector<webrtc::RtpSource>{
72118
std::move(csrc_rtp_source), std::move(ssrc_rtp_source)}));
@@ -93,6 +139,7 @@ TEST(ConferenceAudioTrackTest, CallsObserverWithAudioFrame) {
93139
EXPECT_EQ(received_frame->sample_rate, 48000);
94140
EXPECT_EQ(received_frame->number_of_channels, 2);
95141
EXPECT_EQ(received_frame->number_of_frames, 100);
142+
EXPECT_FALSE(received_frame->is_from_loudest_speaker);
96143
EXPECT_EQ(received_frame->contributing_source, 123);
97144
EXPECT_EQ(received_frame->synchronization_source, 456);
98145
}
@@ -119,15 +166,13 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithUnsupportedBitsPerSample) {
119166
}
120167

121168
TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrc) {
122-
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
169+
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
123170
new webrtc::MockRtpReceiver());
124171
webrtc::RtpSource ssrc_rtp_source(
125172
webrtc::Timestamp::Micros(1234567890),
126173
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
127174
/*rtp_timestamp=*/2222222,
128-
{.audio_level = 100,
129-
.absolute_capture_time =
130-
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
175+
{.audio_level = 0, .absolute_capture_time = std::nullopt});
131176
EXPECT_CALL(*mock_receiver, GetSources)
132177
.WillOnce(
133178
Return(std::vector<webrtc::RtpSource>{std::move(ssrc_rtp_source)}));
@@ -153,15 +198,13 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrc) {
153198
}
154199

155200
TEST(ConferenceAudioTrackTest, LogsErrorWithMissingSsrc) {
156-
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
201+
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
157202
new webrtc::MockRtpReceiver());
158203
webrtc::RtpSource csrc_rtp_source(
159204
webrtc::Timestamp::Micros(1234567890),
160205
/*source_id=*/123, webrtc::RtpSourceType::CSRC,
161206
/*rtp_timestamp=*/1111111,
162-
{.audio_level = 100,
163-
.absolute_capture_time =
164-
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
207+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
165208
EXPECT_CALL(*mock_receiver, GetSources)
166209
.WillOnce(
167210
Return(std::vector<webrtc::RtpSource>{std::move(csrc_rtp_source)}));
@@ -186,7 +229,7 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithMissingSsrc) {
186229
EXPECT_EQ(message, "AudioFrame is missing SSRC for mid: mid");
187230
}
188231
TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrcAndSsrc) {
189-
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
232+
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
190233
new webrtc::MockRtpReceiver());
191234
EXPECT_CALL(*mock_receiver, GetSources)
192235
.WillOnce(Return(std::vector<webrtc::RtpSource>()));
@@ -215,31 +258,27 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrcAndSsrc) {
215258
"AudioFrame is missing SSRC for mid: mid"));
216259
}
217260

218-
TEST(ConferenceAudioTrackTest, LogsIgnoringLoudestParticipantIndicator) {
219-
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
261+
TEST(ConferenceAudioTrackTest, LogsErrorWithOnlyLoudestSpeakerCsrc) {
262+
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
220263
new webrtc::MockRtpReceiver());
221264
webrtc::RtpSource csrc_rtp_source(
222265
webrtc::Timestamp::Micros(1234567890),
223266
/*source_id=*/kLoudestSpeakerCsrc, webrtc::RtpSourceType::CSRC,
224267
/*rtp_timestamp=*/1111111,
225-
{.audio_level = 100,
226-
.absolute_capture_time =
227-
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
268+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
228269
webrtc::RtpSource ssrc_rtp_source(
229270
webrtc::Timestamp::Micros(1234567890),
230271
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
231272
/*rtp_timestamp=*/2222222,
232-
{.audio_level = 100,
233-
.absolute_capture_time =
234-
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
273+
{.audio_level = 100, .absolute_capture_time = std::nullopt});
235274
EXPECT_CALL(*mock_receiver, GetSources)
236275
.WillOnce(Return(std::vector<webrtc::RtpSource>{
237276
std::move(csrc_rtp_source), std::move(ssrc_rtp_source)}));
238277
ConferenceAudioTrack audio_track("mid", mock_receiver,
239278
[](AudioFrame /*frame*/) {});
240279
ScopedMockLog log(kDoNotCaptureLogsYet);
241280
std::string message;
242-
EXPECT_CALL(log, Log(INFO, _, _))
281+
EXPECT_CALL(log, Log(ERROR, _, _))
243282
.WillOnce([&message](int, const std::string &, const std::string &msg) {
244283
message = msg;
245284
});
@@ -253,7 +292,7 @@ TEST(ConferenceAudioTrackTest, LogsIgnoringLoudestParticipantIndicator) {
253292
/*number_of_frames=*/100,
254293
/*absolute_capture_timestamp_ms=*/std::nullopt);
255294

256-
EXPECT_EQ(message, "Ignoring loudest speaker indicator for mid: mid");
295+
EXPECT_EQ(message, "AudioFrame is missing CSRC for mid: mid");
257296
}
258297

259298
TEST(ConferenceVideoTrackTest, CallsObserverWithVideoFrame) {

0 commit comments

Comments
 (0)