Skip to content

Commit ca39bd7

Browse files
committed
Fix SSE3 > SSE2
1 parent dd6ecd7 commit ca39bd7

File tree

1 file changed

+1
-4
lines changed

1 file changed

+1
-4
lines changed

modules/yup_dsp/convolution/yup_PartitionedConvolver.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,15 +217,13 @@ class PartitionedConvolver::DirectFIR
217217
std::size_t i = 0;
218218

219219
#if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS
220-
// 8-wide AVX2 FMA path
221220
__m256 vacc = _mm256_setzero_ps();
222221
for (; i + 8 <= len; i += 8)
223222
{
224223
__m256 va = _mm256_loadu_ps (a + i);
225224
__m256 vb = _mm256_loadu_ps (b + i);
226225
vacc = _mm256_fmadd_ps (va, vb, vacc);
227226
}
228-
// horizontal add
229227
__m128 low = _mm256_castps256_ps128 (vacc);
230228
__m128 high = _mm256_extractf128_ps (vacc, 1);
231229
__m128 vsum = _mm_add_ps (low, high);
@@ -250,8 +248,7 @@ class PartitionedConvolver::DirectFIR
250248
vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb));
251249
}
252250
#endif
253-
// horizontal add
254-
__m128 shuf = _mm_movehdup_ps (vacc);
251+
__m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2, 3, 0, 1));
255252
__m128 sums = _mm_add_ps (vacc, shuf);
256253
shuf = _mm_movehl_ps (shuf, sums);
257254
sums = _mm_add_ss (sums, shuf);

0 commit comments

Comments
 (0)