Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions native_with_state/api/media_api_client_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ struct AudioFrame {
int sample_rate;
size_t number_of_channels;
size_t number_of_frames;
bool is_from_loudest_speaker;
/// Contributing source (CSRC) of the current audio frame. This ID is used to
/// identify which participant in the conference generated the frame.
/// Integrators can cross reference this value with values pushed from Meet
Expand Down
27 changes: 18 additions & 9 deletions native_with_state/internal/conference_media_tracks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,32 @@ void ConferenceAudioTrack::OnData(
<< ". Expected 16.";
return;
}

// Audio data is expected to be in PCM format, where each sample is 16 bits.
const auto* pcm_data = reinterpret_cast<const int16_t*>(audio_data);

bool is_from_loudest_speaker = false;
std::optional<uint32_t> csrc;
std::optional<uint32_t> ssrc;
// Audio csrcs and ssrcs are not included in the audio data. Therefore,
// extract them from the RtpReceiver.
for (const auto& rtp_source : receiver_->GetSources()) {
// It is expected that there will be only one CSRC and SSRC per audio frame.
// It is expected that there may be 1 or 2 contributing sources. The
// contributing source corresponding to the participant's audio stream will
// always be present. Meet may also send a contributing source with value
// `kLoudestSpeakerCsrc` to indicate that this audio stream is from the
// loudest speaker.
//
// Knowing the loudest speaker can be useful, as it can be used to determine
// which participant to prioritize when rendering audio or video (although
// other methods may be used as well).
if (rtp_source.source_type() == webrtc::RtpSourceType::CSRC) {
csrc = rtp_source.source_id();
}
if (rtp_source.source_type() == webrtc::RtpSourceType::SSRC) {
if (rtp_source.source_id() == kLoudestSpeakerCsrc) {
is_from_loudest_speaker = true;
} else {
csrc = rtp_source.source_id();
}
} else if (rtp_source.source_type() == webrtc::RtpSourceType::SSRC) {
ssrc = rtp_source.source_id();
}
}
Expand All @@ -67,11 +80,6 @@ void ConferenceAudioTrack::OnData(
return;
}

if (*csrc == kLoudestSpeakerCsrc) {
LOG(INFO) << "Ignoring loudest speaker indicator for mid: " << mid_;
return;
}

// Audio data in PCM format is expected to be stored in a contiguous buffer,
// where there are `number_of_channels * number_of_frames` audio frames.
absl::Span<const int16_t> pcm_data_span =
Expand All @@ -81,6 +89,7 @@ void ConferenceAudioTrack::OnData(
.sample_rate = sample_rate,
.number_of_channels = number_of_channels,
.number_of_frames = number_of_frames,
.is_from_loudest_speaker = is_from_loudest_speaker,
.contributing_source = csrc.value(),
.synchronization_source = ssrc.value()});
};
Expand Down
97 changes: 68 additions & 29 deletions native_with_state/internal/conference_media_tracks_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include "testing/base/public/mock-log.h"
#include "absl/base/log_severity.h"
#include "native_with_state/api/media_api_client_interface.h"
#include "webrtc/api/rtp_headers.h"
#include "webrtc/api/rtp_packet_info.h"
#include "webrtc/api/rtp_packet_infos.h"
#include "webrtc/api/scoped_refptr.h"
Expand All @@ -41,7 +40,6 @@ namespace meet {
namespace {

using ::base_logging::ERROR;
using ::base_logging::INFO;
using ::testing::_;
using ::testing::kDoNotCaptureLogsYet;
using ::testing::MockFunction;
Expand All @@ -50,23 +48,71 @@ using ::testing::ScopedMockLog;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAre;

TEST(ConferenceAudioTrackTest, CallsObserverWithAudioFrame) {
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
TEST(ConferenceAudioTrackTest, CallsObserverWithAudioFrameFromLoudestSpeaker) {
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
new webrtc::MockRtpReceiver());
webrtc::RtpSource csrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/123, webrtc::RtpSourceType::CSRC,
/*rtp_timestamp=*/1111111,
{.audio_level = 100,
.absolute_capture_time =
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
{.audio_level = 100, .absolute_capture_time = std::nullopt});
webrtc::RtpSource loudest_speaker_csrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/kLoudestSpeakerCsrc, webrtc::RtpSourceType::CSRC,
/*rtp_timestamp=*/1111111,
{.audio_level = 100, .absolute_capture_time = std::nullopt});
webrtc::RtpSource ssrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
/*rtp_timestamp=*/2222222,
{.audio_level = 100,
.absolute_capture_time =
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
{.audio_level = 100, .absolute_capture_time = std::nullopt});
EXPECT_CALL(*mock_receiver, GetSources)
.WillOnce(Return(std::vector<webrtc::RtpSource>{
std::move(csrc_rtp_source),
std::move(loudest_speaker_csrc_rtp_source),
std::move(ssrc_rtp_source)}));
MockFunction<void(AudioFrame)> mock_function;
std::optional<AudioFrame> received_frame;
EXPECT_CALL(mock_function, Call)
.WillOnce([&received_frame](AudioFrame frame) {
received_frame = std::move(frame);
});
ConferenceAudioTrack audio_track("mid", mock_receiver,
mock_function.AsStdFunction());
int16_t pcm_data[2 * 100];

audio_track.OnData(pcm_data,
/*bits_per_sample=*/16,
/*sample_rate=*/48000,
/*number_of_channels=*/2,
/*number_of_frames=*/100,
/*absolute_capture_timestamp_ms=*/std::nullopt);

ASSERT_TRUE(received_frame.has_value());
EXPECT_THAT(received_frame->pcm16, SizeIs(100 * 2));
EXPECT_EQ(received_frame->bits_per_sample, 16);
EXPECT_EQ(received_frame->sample_rate, 48000);
EXPECT_EQ(received_frame->number_of_channels, 2);
EXPECT_EQ(received_frame->number_of_frames, 100);
EXPECT_TRUE(received_frame->is_from_loudest_speaker);
EXPECT_EQ(received_frame->contributing_source, 123);
EXPECT_EQ(received_frame->synchronization_source, 456);
}

TEST(ConferenceAudioTrackTest,
CallsObserverWithAudioFrameFromNonLoudestSpeaker) {
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
new webrtc::MockRtpReceiver());
webrtc::RtpSource csrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/123, webrtc::RtpSourceType::CSRC,
/*rtp_timestamp=*/1111111,
{.audio_level = 100, .absolute_capture_time = std::nullopt});
webrtc::RtpSource ssrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
/*rtp_timestamp=*/2222222,
{.audio_level = 100, .absolute_capture_time = std::nullopt});
EXPECT_CALL(*mock_receiver, GetSources)
.WillOnce(Return(std::vector<webrtc::RtpSource>{
std::move(csrc_rtp_source), std::move(ssrc_rtp_source)}));
Expand All @@ -93,6 +139,7 @@ TEST(ConferenceAudioTrackTest, CallsObserverWithAudioFrame) {
EXPECT_EQ(received_frame->sample_rate, 48000);
EXPECT_EQ(received_frame->number_of_channels, 2);
EXPECT_EQ(received_frame->number_of_frames, 100);
EXPECT_FALSE(received_frame->is_from_loudest_speaker);
EXPECT_EQ(received_frame->contributing_source, 123);
EXPECT_EQ(received_frame->synchronization_source, 456);
}
Expand All @@ -119,15 +166,13 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithUnsupportedBitsPerSample) {
}

TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrc) {
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
new webrtc::MockRtpReceiver());
webrtc::RtpSource ssrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
/*rtp_timestamp=*/2222222,
{.audio_level = 100,
.absolute_capture_time =
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
{.audio_level = 0, .absolute_capture_time = std::nullopt});
EXPECT_CALL(*mock_receiver, GetSources)
.WillOnce(
Return(std::vector<webrtc::RtpSource>{std::move(ssrc_rtp_source)}));
Expand All @@ -153,15 +198,13 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrc) {
}

TEST(ConferenceAudioTrackTest, LogsErrorWithMissingSsrc) {
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
new webrtc::MockRtpReceiver());
webrtc::RtpSource csrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/123, webrtc::RtpSourceType::CSRC,
/*rtp_timestamp=*/1111111,
{.audio_level = 100,
.absolute_capture_time =
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
{.audio_level = 100, .absolute_capture_time = std::nullopt});
EXPECT_CALL(*mock_receiver, GetSources)
.WillOnce(
Return(std::vector<webrtc::RtpSource>{std::move(csrc_rtp_source)}));
Expand All @@ -186,7 +229,7 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithMissingSsrc) {
EXPECT_EQ(message, "AudioFrame is missing SSRC for mid: mid");
}
TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrcAndSsrc) {
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
new webrtc::MockRtpReceiver());
EXPECT_CALL(*mock_receiver, GetSources)
.WillOnce(Return(std::vector<webrtc::RtpSource>()));
Expand Down Expand Up @@ -215,31 +258,27 @@ TEST(ConferenceAudioTrackTest, LogsErrorWithMissingCsrcAndSsrc) {
"AudioFrame is missing SSRC for mid: mid"));
}

TEST(ConferenceAudioTrackTest, LogsIgnoringLoudestParticipantIndicator) {
auto mock_receiver = rtc::scoped_refptr<webrtc::MockRtpReceiver>(
TEST(ConferenceAudioTrackTest, LogsErrorWithOnlyLoudestSpeakerCsrc) {
rtc::scoped_refptr<webrtc::MockRtpReceiver> mock_receiver(
new webrtc::MockRtpReceiver());
webrtc::RtpSource csrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/kLoudestSpeakerCsrc, webrtc::RtpSourceType::CSRC,
/*rtp_timestamp=*/1111111,
{.audio_level = 100,
.absolute_capture_time =
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
{.audio_level = 100, .absolute_capture_time = std::nullopt});
webrtc::RtpSource ssrc_rtp_source(
webrtc::Timestamp::Micros(1234567890),
/*source_id=*/456, webrtc::RtpSourceType::SSRC,
/*rtp_timestamp=*/2222222,
{.audio_level = 100,
.absolute_capture_time =
webrtc::AbsoluteCaptureTime(1234567890, 1000000000)});
{.audio_level = 100, .absolute_capture_time = std::nullopt});
EXPECT_CALL(*mock_receiver, GetSources)
.WillOnce(Return(std::vector<webrtc::RtpSource>{
std::move(csrc_rtp_source), std::move(ssrc_rtp_source)}));
ConferenceAudioTrack audio_track("mid", mock_receiver,
[](AudioFrame /*frame*/) {});
ScopedMockLog log(kDoNotCaptureLogsYet);
std::string message;
EXPECT_CALL(log, Log(INFO, _, _))
EXPECT_CALL(log, Log(ERROR, _, _))
.WillOnce([&message](int, const std::string &, const std::string &msg) {
message = msg;
});
Expand All @@ -253,7 +292,7 @@ TEST(ConferenceAudioTrackTest, LogsIgnoringLoudestParticipantIndicator) {
/*number_of_frames=*/100,
/*absolute_capture_timestamp_ms=*/std::nullopt);

EXPECT_EQ(message, "Ignoring loudest speaker indicator for mid: mid");
EXPECT_EQ(message, "AudioFrame is missing CSRC for mid: mid");
}

TEST(ConferenceVideoTrackTest, CallsObserverWithVideoFrame) {
Expand Down