@@ -32,7 +32,7 @@ namespace yup
3232 @param Y pointer to output complex array (accumulated)
3333 @param complexPairs number of complex pairs (not number of floats!)
3434*/
35- static void complexMultiplyAccumulate (const float * A, const float * B, float * Y, int complexPairs) noexcept
35+ static void complexMultiplyAccumulate (const float * __restrict A, const float * __restrict B, float * __restrict Y, int complexPairs) noexcept
3636{
3737 int i = 0 ;
3838
@@ -125,147 +125,6 @@ static void complexMultiplyAccumulate (const float* A, const float* B, float* Y,
125125
126126// ==============================================================================
127127
128- class PartitionedConvolver ::DirectFIR
129- {
130- public:
131- DirectFIR () = default ;
132-
133- void setTaps (std::vector<float > taps, float scaling)
134- {
135- FloatVectorOperations::multiply (taps.data (), scaling, taps.size ());
136-
137- tapsReversed = std::move (taps);
138- std::reverse (tapsReversed.begin (), tapsReversed.end ());
139-
140- numTaps = tapsReversed.size ();
141- paddedLen = (numTaps + 3u ) & ~3u ;
142- tapsReversed.resize (paddedLen, 0 .0f );
143-
144- history.assign (2 * numTaps, 0 .0f );
145- writeIndex = 0 ;
146- }
147-
148- void reset ()
149- {
150- std::fill (history.begin (), history.end (), 0 .0f );
151- writeIndex = 0 ;
152- }
153-
154- void process (const float * input, float * output, std::size_t numSamples) noexcept
155- {
156- const std::size_t M = numTaps;
157- if (M == 0 )
158- return ;
159-
160- const float * h = tapsReversed.data ();
161- for (std::size_t i = 0 ; i < numSamples; ++i)
162- {
163- const float x = input[i];
164-
165- history[writeIndex] = x;
166- history[writeIndex + M] = x;
167-
168- const float * w = history.data () + writeIndex + 1 ;
169-
170- float sum = 0 .0f ;
171-
172- #if YUP_ENABLE_VDSP
173- vDSP_dotpr (w, 1 , h, 1 , &sum, M);
174- #else
175- sum = dotProduct (w, h, M);
176- #endif
177-
178- output[i] += sum;
179-
180- if (++writeIndex == M)
181- writeIndex = 0 ;
182- }
183- }
184-
185- std::size_t getNumTaps () const
186- {
187- return numTaps;
188- }
189-
190- private:
191- static float dotProduct (const float * __restrict a, const float * __restrict b, std::size_t len) noexcept
192- {
193- float acc = 0 .0f ;
194- std::size_t i = 0 ;
195-
196- #if YUP_USE_AVX_INTRINSICS && YUP_USE_FMA_INTRINSICS
197- __m256 vacc = _mm256_setzero_ps ();
198- for (; i + 8 <= len; i += 8 )
199- {
200- __m256 va = _mm256_loadu_ps (a + i);
201- __m256 vb = _mm256_loadu_ps (b + i);
202- vacc = _mm256_fmadd_ps (va, vb, vacc);
203- }
204- __m128 low = _mm256_castps256_ps128 (vacc);
205- __m128 high = _mm256_extractf128_ps (vacc, 1 );
206- __m128 vsum = _mm_add_ps (low, high);
207- vsum = _mm_hadd_ps (vsum, vsum);
208- vsum = _mm_hadd_ps (vsum, vsum);
209- acc += _mm_cvtss_f32 (vsum);
210-
211- #elif YUP_USE_SSE_INTRINSICS
212- __m128 vacc = _mm_setzero_ps ();
213- #if YUP_USE_FMA_INTRINSICS
214- for (; i + 4 <= len; i += 4 )
215- {
216- __m128 va = _mm_loadu_ps (a + i);
217- __m128 vb = _mm_loadu_ps (b + i);
218- vacc = _mm_fmadd_ps (va, vb, vacc);
219- }
220- #else
221- for (; i + 4 <= len; i += 4 )
222- {
223- __m128 va = _mm_loadu_ps (a + i);
224- __m128 vb = _mm_loadu_ps (b + i);
225- vacc = _mm_add_ps (vacc, _mm_mul_ps (va, vb));
226- }
227- #endif
228- __m128 shuf = _mm_shuffle_ps (vacc, vacc, _MM_SHUFFLE (2 , 3 , 0 , 1 ));
229- __m128 sums = _mm_add_ps (vacc, shuf);
230- shuf = _mm_movehl_ps (shuf, sums);
231- sums = _mm_add_ss (sums, shuf);
232- acc += _mm_cvtss_f32 (sums);
233-
234- #elif YUP_USE_ARM_NEON
235- float32x4_t vacc = vdupq_n_f32 (0 .0f );
236- for (; i + 4 <= len; i += 4 )
237- {
238- float32x4_t va = vld1q_f32 (a + i);
239- float32x4_t vb = vld1q_f32 (b + i);
240- vacc = vmlaq_f32 (vacc, va, vb);
241- }
242- #if YUP_64BIT
243- acc += vaddvq_f32 (vacc);
244- #else
245- float32x2_t vlow = vget_low_f32 (vacc);
246- float32x2_t vhigh = vget_high_f32 (vacc);
247- float32x2_t vsum2 = vpadd_f32 (vlow, vhigh);
248- vsum2 = vpadd_f32 (vsum2, vsum2);
249- acc += vget_lane_f32 (vsum2, 0 );
250- #endif
251-
252- #endif
253-
254- for (; i < len; ++i)
255- acc += a[i] * b[i];
256-
257- return acc;
258- }
259-
260- std::vector<float > tapsReversed;
261- std::vector<float > history;
262- std::size_t numTaps = 0 ;
263- std::size_t paddedLen = 0 ;
264- std::size_t writeIndex = 0 ;
265- };
266-
267- // ==============================================================================
268-
269128class PartitionedConvolver ::FFTLayer
270129{
271130public:
@@ -544,9 +403,9 @@ class PartitionedConvolver::Impl
544403 Impl () = default ;
545404 ~Impl () = default ;
546405
547- void configureLayers (std::size_t directFIRTaps , const std::vector<LayerSpec>& newLayers)
406+ void configureLayers (std::size_t directFIRCoefficients , const std::vector<LayerSpec>& newLayers)
548407 {
549- directFIRTapCount = directFIRTaps ;
408+ directFIRCoefficientCount = directFIRCoefficients ;
550409
551410 layers.clear ();
552411 layers.resize (newLayers.size ());
@@ -659,6 +518,7 @@ class PartitionedConvolver::Impl
659518 if (significantContentEnd == 0 )
660519 {
661520 const std::size_t checkLength = std::min (minRetainLength, length);
521+
662522 float rmsSquared = 0 .0f ;
663523 for (std::size_t j = 0 ; j < checkLength; ++j)
664524 rmsSquared += impulseResponse[j] * impulseResponse[j];
@@ -685,12 +545,10 @@ class PartitionedConvolver::Impl
685545 // Safety check
686546 if (impulseResponse != nullptr && trimmedLength > 0 )
687547 {
688- // Trim end silence if requested
689- if (options.trimEndSilenceBelowDb )
690- trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb );
691-
692548 // Always apply peak headroom
693549 float headroomScale = std::pow (10 .0f , options.headroomDb / 20 .0f );
550+
551+ // Normalize peaks
694552 if (options.normalize )
695553 {
696554 const auto minMax = FloatVectorOperations::findMinAndMax (impulseResponse, trimmedLength);
@@ -700,20 +558,24 @@ class PartitionedConvolver::Impl
700558 headroomScale /= peak;
701559 }
702560
561+ // Trim end silence if requested
562+ if (options.trimEndSilenceBelowDb )
563+ trimmedLength = trimSilenceFromEnd (impulseResponse, length, *options.trimEndSilenceBelowDb );
564+
703565 // Update DirectFIR in-place
704- std::vector<float > directTaps ;
566+ std::vector<float > directCoefficients ;
705567
706- const auto directTapsCount = std::min (directFIRTapCount , trimmedLength);
707- if (directTapsCount > 0 )
568+ const auto directCoefficientsCount = std::min (directFIRCoefficientCount , trimmedLength);
569+ if (directCoefficientsCount > 0 )
708570 {
709- directTaps .reserve (directTapsCount );
710- directTaps .assign (impulseResponse, impulseResponse + directTapsCount );
571+ directCoefficients .reserve (directCoefficientsCount );
572+ directCoefficients .assign (impulseResponse, impulseResponse + directCoefficientsCount );
711573 }
712574
713- newFIR.setTaps (std::move (directTaps ), headroomScale);
575+ newFIR.setCoefficients (std::move (directCoefficients ), headroomScale);
714576
715577 // Update FFT layers
716- std::size_t consumed = directTapsCount ;
578+ std::size_t consumed = directCoefficientsCount ;
717579 for (std::size_t i = 0 ; i < newLayers.size (); ++i)
718580 {
719581 auto & layer = newLayers[i];
@@ -735,11 +597,17 @@ class PartitionedConvolver::Impl
735597
736598 directFIR = std::move (newFIR);
737599 layers = std::move (newLayers);
600+ finalImpulseLength = trimmedLength;
738601
739602 resetStateUnsafe ();
740603 }
741604 }
742605
606+ std::size_t getImpulseLength () const
607+ {
608+ return finalImpulseLength;
609+ }
610+
743611 void reset ()
744612 {
745613 SpinLock::ScopedLockType lock (processingLock);
@@ -890,10 +758,11 @@ class PartitionedConvolver::Impl
890758
891759 std::size_t getInputStagingAvailable () const { return inputStagingAvailable; }
892760
893- std::size_t directFIRTapCount = 0 ;
761+ std::size_t directFIRCoefficientCount = 0 ;
894762 int baseHopSize = 0 ;
895763 std::size_t maxHopSize = 0 ;
896764 std::size_t maxBlockSize = 0 ;
765+ std::size_t finalImpulseLength = 0 ;
897766 bool isPrepared = false ;
898767
899768 DirectFIR directFIR;
@@ -941,25 +810,25 @@ PartitionedConvolver& PartitionedConvolver::operator= (PartitionedConvolver&& ot
941810 return *this ;
942811}
943812
944- void PartitionedConvolver::configureLayers (std::size_t directFIRTaps , const std::vector<LayerSpec>& layers)
813+ void PartitionedConvolver::configureLayers (std::size_t directFIRCoefficients , const std::vector<LayerSpec>& layers)
945814{
946- pImpl->configureLayers (directFIRTaps , layers);
815+ pImpl->configureLayers (directFIRCoefficients , layers);
947816}
948817
949- void PartitionedConvolver::setTypicalLayout (std::size_t directTaps , const std::vector<int >& hops)
818+ void PartitionedConvolver::setTypicalLayout (std::size_t directCoefficients , const std::vector<int >& hops)
950819{
951820 std::vector<LayerSpec> layerSpecs;
952821 layerSpecs.reserve (hops.size ());
953822
954823 for (int hop : hops)
955824 {
956825 if (hop < 64 )
957- directTaps += static_cast <std::size_t > (hop);
826+ directCoefficients += static_cast <std::size_t > (hop);
958827 else
959828 layerSpecs.push_back ({ nextPowerOfTwo (hop) });
960829 }
961830
962- configureLayers (directTaps , layerSpecs);
831+ configureLayers (directCoefficients , layerSpecs);
963832}
964833
965834void PartitionedConvolver::setImpulseResponse (const float * impulseResponse, std::size_t length, const IRLoadOptions& options)
@@ -972,6 +841,11 @@ void PartitionedConvolver::setImpulseResponse (const std::vector<float>& impulse
972841 setImpulseResponse (impulseResponse.data (), impulseResponse.size (), options);
973842}
974843
844+ std::size_t PartitionedConvolver::getImpulseLength () const
845+ {
846+ return pImpl->getImpulseLength ();
847+ }
848+
975849void PartitionedConvolver::prepare (std::size_t maxBlockSize)
976850{
977851 pImpl->prepare (maxBlockSize);
0 commit comments