Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit 8cb0d9d

Browse files
peahCommit bot
authored andcommitted
Merge of Echo canceller 3 improvements for setups with headsets.
This is a merge of the CL https://codereview.webrtc.org/2823903003 into M59. This CL improves the echo cancellation performance on setups where headsets are used (systems with such low echo path gain that no correlation between the render and capture signals can be found) in 4 ways: 1) The echo path gain for systems with headsets is assumed to be nonzero. 2) The stationary component of the render power is not included in nonlinear echo power estimate. 3) The behavior after echo path gain changes is made less cautious. 4) The detection of systems with headsets is made more rapid. NOTRY=true NOPRESUBMIT=true BUG=chromium:712651, webrtc:6018 Review-Url: https://codereview.webrtc.org/2823903003 Cr-Commit-Position: refs/heads/master@{#17768} (cherry picked from commit e52a203) Review-Url: https://codereview.webrtc.org/2833353002 Cr-Commit-Position: refs/branch-heads/59@{#5} Cr-Branched-From: 10d095d-refs/heads/master@{#17657}
1 parent 6d56d2e commit 8cb0d9d

File tree

4 files changed

+78
-27
lines changed

4 files changed

+78
-27
lines changed

webrtc/modules/audio_processing/aec3/aec_state.cc

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
namespace webrtc {
2323
namespace {
2424

25-
constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond;
25+
constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
2626
constexpr size_t kSaturationLeakageBlocks = 20;
2727

2828
// Computes delay of the adaptive filter.
@@ -89,7 +89,6 @@ void AecState::HandleEchoPathChange(
8989
const EchoPathVariability& echo_path_variability) {
9090
if (echo_path_variability.AudioPathChanged()) {
9191
blocks_since_last_saturation_ = 0;
92-
active_render_blocks_ = 0;
9392
usable_linear_estimate_ = false;
9493
echo_leakage_detected_ = false;
9594
capture_signal_saturation_ = false;
@@ -98,6 +97,8 @@ void AecState::HandleEchoPathChange(
9897

9998
if (echo_path_variability.delay_change) {
10099
force_zero_gain_counter_ = 0;
100+
blocks_with_filter_adaptation_ = 0;
101+
render_received_ = false;
101102
force_zero_gain_ = true;
102103
echo_path_change_counter_ = kEchoPathChangeCounterMax;
103104
}
@@ -121,7 +122,11 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
121122
// Update counters.
122123
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
123124
const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;
124-
active_render_blocks_ += active_render_block ? 1 : 0;
125+
if (active_render_block) {
126+
render_received_ = true;
127+
}
128+
blocks_with_filter_adaptation_ +=
129+
(active_render_block && (!SaturatedCapture()) ? 1 : 0);
125130
--echo_path_change_counter_;
126131

127132
// Force zero echo suppression gain after an echo path change to allow at
@@ -145,6 +150,8 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
145150
}
146151

147152
// Detect and flag echo saturation.
153+
// TODO(peah): Add the delay in this computation to ensure that the render and
154+
// capture signals are properly aligned.
148155
RTC_DCHECK_LT(0, x.size());
149156
const float max_sample = fabs(*std::max_element(
150157
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
@@ -160,14 +167,17 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
160167
// Flag whether the linear filter estimate is usable.
161168
usable_linear_estimate_ =
162169
(!echo_saturation_) &&
163-
active_render_blocks_ > kEchoPathChangeConvergenceBlocks &&
170+
(!render_received_ ||
171+
blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&
164172
filter_delay_ && echo_path_change_counter_ <= 0;
165173

166174
// After an amount of active render samples for which an echo should have been
167175
// detected in the capture signal if the ERL was not infinite, flag that a
168176
// headset is used.
169-
headset_detected_ = !external_delay_ && !filter_delay_ &&
170-
active_render_blocks_ >= kEchoPathChangeConvergenceBlocks;
177+
headset_detected_ =
178+
!external_delay_ && !filter_delay_ &&
179+
(!render_received_ ||
180+
blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
171181
}
172182

173183
} // namespace webrtc

webrtc/modules/audio_processing/aec3/aec_state.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class AecState {
4141
bool EchoLeakageDetected() const { return echo_leakage_detected_; }
4242

4343
// Returns whether the render signal is currently active.
44-
bool ActiveRender() const { return active_render_blocks_ > 200; }
44+
// TODO(peah): Deprecate this in an upcoming CL.
45+
bool ActiveRender() const { return blocks_with_filter_adaptation_ > 200; }
4546

4647
// Returns the ERLE.
4748
const std::array<float, kFftLengthBy2Plus1>& Erle() const {
@@ -99,14 +100,15 @@ class AecState {
99100
ErlEstimator erl_estimator_;
100101
ErleEstimator erle_estimator_;
101102
int echo_path_change_counter_;
102-
size_t active_render_blocks_ = 0;
103+
size_t blocks_with_filter_adaptation_ = 0;
103104
bool usable_linear_estimate_ = false;
104105
bool echo_leakage_detected_ = false;
105106
bool capture_signal_saturation_ = false;
106107
bool echo_saturation_ = false;
107108
bool headset_detected_ = false;
108109
float previous_max_sample_ = 0.f;
109110
bool force_zero_gain_ = false;
111+
bool render_received_ = false;
110112
size_t force_zero_gain_counter_ = 0;
111113
rtc::Optional<size_t> filter_delay_;
112114
rtc::Optional<size_t> external_delay_;

webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,43 @@ void EchoGeneratingPower(const RenderBuffer& render_buffer,
4040
});
4141
}
4242

43+
constexpr int kNoiseFloorCounterMax = 50;
44+
constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
45+
46+
// Updates estimate for the power of the stationary noise component in the
47+
// render signal.
48+
void RenderNoisePower(
49+
const RenderBuffer& render_buffer,
50+
std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
51+
std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
52+
RTC_DCHECK(X2_noise_floor);
53+
RTC_DCHECK(X2_noise_floor_counter);
54+
55+
const auto render_power = render_buffer.Spectrum(0);
56+
RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
57+
RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
58+
59+
// Estimate the stationary noise power in a minimum statistics manner.
60+
for (size_t k = 0; k < render_power.size(); ++k) {
61+
// Decrease rapidly.
62+
if (render_power[k] < (*X2_noise_floor)[k]) {
63+
(*X2_noise_floor)[k] = render_power[k];
64+
(*X2_noise_floor_counter)[k] = 0;
65+
} else {
66+
// Increase in a delayed, leaky manner.
67+
if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
68+
(*X2_noise_floor)[k] =
69+
std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
70+
} else {
71+
++(*X2_noise_floor_counter)[k];
72+
}
73+
}
74+
}
75+
}
76+
77+
// Assume a minimum echo path gain of -33 dB for headsets.
78+
constexpr float kHeadsetEchoPathGain = 0.0005f;
79+
4380
} // namespace
4481

4582
ResidualEchoEstimator::ResidualEchoEstimator() {
@@ -57,28 +94,19 @@ void ResidualEchoEstimator::Estimate(
5794
std::array<float, kFftLengthBy2Plus1>* R2) {
5895
RTC_DCHECK(R2);
5996

60-
// Return zero residual echo power when a headset is detected.
61-
if (aec_state.HeadsetDetected()) {
62-
if (!headset_detected_cached_) {
63-
Reset();
64-
headset_detected_cached_ = true;
65-
}
66-
R2->fill(0.f);
67-
return;
68-
} else {
69-
headset_detected_cached_ = false;
70-
}
71-
7297
const rtc::Optional<size_t> delay =
7398
aec_state.FilterDelay()
7499
? aec_state.FilterDelay()
75100
: (aec_state.ExternalDelay() ? aec_state.ExternalDelay()
76101
: rtc::Optional<size_t>());
77102

103+
// Estimate the power of the stationary noise in the render signal.
104+
RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
105+
78106
// Estimate the residual echo power.
79107
const bool use_linear_echo_power =
80108
aec_state.UsableLinearEstimate() && using_subtractor_output;
81-
if (use_linear_echo_power) {
109+
if (use_linear_echo_power && !aec_state.HeadsetDetected()) {
82110
RTC_DCHECK(aec_state.FilterDelay());
83111
const int filter_delay = *aec_state.FilterDelay();
84112
LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
@@ -102,7 +130,15 @@ void ResidualEchoEstimator::Estimate(
102130
kResidualEchoPowerRenderWindowSize - 1, &X2);
103131
}
104132

105-
NonLinearEstimate(X2, Y2, R2);
133+
// Subtract the stationary noise power to avoid stationary noise causing
134+
// excessive echo suppression.
135+
std::transform(
136+
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
137+
[](float a, float b) { return std::max(0.f, a - 10.f * b); });
138+
139+
NonLinearEstimate(
140+
aec_state.HeadsetDetected() ? kHeadsetEchoPathGain : kFixedEchoPathGain,
141+
X2, Y2, R2);
106142
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
107143
std::min(static_cast<size_t>(kAdaptiveFilterLength),
108144
delay.value_or(kAdaptiveFilterLength)),
@@ -119,6 +155,8 @@ void ResidualEchoEstimator::Estimate(
119155
}
120156

121157
void ResidualEchoEstimator::Reset() {
158+
X2_noise_floor_counter_.fill(kNoiseFloorCounterMax);
159+
X2_noise_floor_.fill(kNoiseFloorMin);
122160
R2_reverb_.fill(0.f);
123161
R2_old_.fill(0.f);
124162
R2_hold_counter_.fill(0.f);
@@ -141,14 +179,13 @@ void ResidualEchoEstimator::LinearEstimate(
141179
}
142180

143181
void ResidualEchoEstimator::NonLinearEstimate(
182+
float echo_path_gain,
144183
const std::array<float, kFftLengthBy2Plus1>& X2,
145184
const std::array<float, kFftLengthBy2Plus1>& Y2,
146185
std::array<float, kFftLengthBy2Plus1>* R2) {
147186
// Compute preliminary residual echo.
148-
// TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to
149-
// 20 dB.
150187
std::transform(X2.begin(), X2.end(), R2->begin(),
151-
[](float a) { return a * kFixedEchoPathGain; });
188+
[echo_path_gain](float a) { return a * echo_path_gain; });
152189

153190
for (size_t k = 0; k < R2->size(); ++k) {
154191
// Update hold counter.

webrtc/modules/audio_processing/aec3/residual_echo_estimator.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ class ResidualEchoEstimator {
4848

4949
// Estimates the residual echo power based on the estimate of the echo path
5050
// gain.
51-
void NonLinearEstimate(const std::array<float, kFftLengthBy2Plus1>& X2,
51+
void NonLinearEstimate(float echo_path_gain,
52+
const std::array<float, kFftLengthBy2Plus1>& X2,
5253
const std::array<float, kFftLengthBy2Plus1>& Y2,
5354
std::array<float, kFftLengthBy2Plus1>* R2);
5455

@@ -66,7 +67,8 @@ class ResidualEchoEstimator {
6667
int S2_old_index_ = 0;
6768
std::array<std::array<float, kFftLengthBy2Plus1>, kAdaptiveFilterLength>
6869
S2_old_;
69-
bool headset_detected_cached_ = false;
70+
std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
71+
std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
7072

7173
RTC_DISALLOW_COPY_AND_ASSIGN(ResidualEchoEstimator);
7274
};

0 commit comments

Comments
 (0)