Skip to content

Commit 27b816b

Browse files
authored
Merge pull request #10 from MeijisIrlnd/syl/simd-filter
SIMD Biquad
2 parents 1590c1c + f44cad6 commit 27b816b

File tree

8 files changed

+311
-1
lines changed

8 files changed

+311
-1
lines changed

CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ if (APPLE)
2525
"-fassociative-math"
2626
"-fno-math-errno"
2727
"-freciprocal-math"
28+
"-ftree-vectorize"
2829
)
2930
else ()
3031
if (WIN32)
@@ -75,6 +76,15 @@ FetchContent_Declare(
7576
)
7677
FetchContent_MakeAvailable(concurrentqueue)
7778

79+
# Disable xsimd unit tests
80+
set(BUILD_TESTS OFF)
81+
FetchContent_Declare(xsimd
82+
GIT_REPOSITORY https://github.com/xtensor-stack/xsimd.git
83+
GIT_TAG fb250213fd8c2db857fa0db05eea9b09fb1fe764
84+
GIT_SHALLOW ON
85+
)
86+
FetchContent_MakeAvailable(xsimd)
87+
7888
add_subdirectory(source)
7989
add_subdirectory(include)
8090
add_subdirectory(tests)
@@ -94,6 +104,7 @@ target_link_libraries(marvin PUBLIC
94104
${MARVIN_EXTRA_LINK_LIBS}
95105
readerwriterqueue
96106
concurrentqueue
107+
xsimd
97108
)
98109

99110
install(DIRECTORY "include/" # source directory

include/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ set(MARVIN_HEADERS
2222
${CMAKE_CURRENT_SOURCE_DIR}/marvin/dsp/filters/biquad/marvin_BiquadCoefficients.h
2323
${CMAKE_CURRENT_SOURCE_DIR}/marvin/dsp/filters/biquad/marvin_SmoothedBiquadCoefficients.h
2424
${CMAKE_CURRENT_SOURCE_DIR}/marvin/dsp/filters/biquad/marvin_Biquad.h
25+
${CMAKE_CURRENT_SOURCE_DIR}/marvin/dsp/filters/biquad/marvin_SIMDBiquad.h
2526
${CMAKE_CURRENT_SOURCE_DIR}/marvin/dsp/filters/biquad/marvin_RBJCoefficients.h
2627
${CMAKE_CURRENT_SOURCE_DIR}/marvin/dsp/oscillators/marvin_Oscillator.h
2728
${CMAKE_CURRENT_SOURCE_DIR}/marvin/library/marvin_Concepts.h

include/marvin/dsp/filters/biquad/marvin_Biquad.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,19 @@ namespace marvin::dsp::filters {
7373
// ));
7474
const auto [a0, a1, a2, b0, b1, b2] = m_coeffs[stage];
7575
auto& delay = m_delays[stage];
76+
const auto invB0 = static_cast<SampleType>(1.0) / b0;
77+
const auto a0x0 = a0 * x;
78+
const auto a1x1 = a1 * delay.x_z1;
79+
const auto a2x2 = a2 * delay.x_z2;
80+
const auto aSum = a0x0 + a1x1 + a2x2;
81+
const auto b1y1 = b1 * delay.y_z1;
82+
const auto b2y2 = b2 * delay.y_z2;
83+
const auto aMinusB1 = aSum - b1y1;
84+
const auto aMinusB2 = aMinusB1 - b2y2;
85+
const auto yOther = invB0 * aMinusB2;
86+
7687
const auto y = static_cast<SampleType>(1.0) / b0 * ((a0 * x) + (a1 * delay.x_z1) + (a2 * delay.x_z2) - (b1 * delay.y_z1) - (b2 * delay.y_z2));
88+
// assert(yOther == y);
7789
delay(x, y);
7890
x = y;
7991
}
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
//
2+
// Created by Syl Morrison on 03/05/2025.
3+
//
4+
5+
#ifndef MARVIN_SIMDBIQUAD_H
6+
#define MARVIN_SIMDBIQUAD_H
7+
#include <marvin/dsp/filters/biquad/marvin_BiquadCoefficients.h>
8+
#include <xsimd/xsimd.hpp>
9+
#include <marvin/library/marvin_Concepts.h>
10+
#include <marvin/math/marvin_VecOps.h>
11+
namespace marvin::dsp::filters {
12+
/**
13+
* \brief A SIMD optimised biquad, for running N biquads in parallel.
14+
*
15+
* From benchmarks, only gives a speedup in certain cases, and even in those cases, only ~100ns.
16+
* That being said, a robust parallel structure for filters is arguably nicer than a std::array<filter, N>.
17+
*
18+
* @tparam SampleType float or double
19+
* @tparam N The number of parallel biquads to process
20+
*/
21+
template <marvin::FloatType SampleType, size_t N>
22+
requires(N > 0)
23+
class SIMDBiquad final {
24+
public:
25+
/**
26+
* Constructor
27+
*/
28+
SIMDBiquad() {
29+
m_working.resize(N, 0.0);
30+
m_a0.resize(N, 0.0);
31+
m_a1.resize(N, 0.0);
32+
m_a2.resize(N, 0.0);
33+
m_b1.resize(N, 0.0);
34+
m_b2.resize(N, 0.0);
35+
m_x1.resize(N, 0.0);
36+
m_x2.resize(N, 0.0);
37+
m_y1.resize(N, 0.0);
38+
m_y2.resize(N, 0.0);
39+
}
40+
41+
/**
42+
* Sets the coefficients for all filters to the ones passed to the `coeffs` arg
43+
*
44+
* @param coeffs A BiquadCoefficients<SampleType> containing the coeffs you want to set.
45+
*/
46+
auto setCoeffs(BiquadCoefficients<SampleType> coeffs) noexcept -> void {
47+
m_equalCoeffs = true;
48+
const auto a0 = coeffs.a0 / coeffs.b0;
49+
const auto a1 = coeffs.a1 / coeffs.b0;
50+
const auto a2 = coeffs.a2 / coeffs.b0;
51+
const auto b1 = coeffs.b1 / coeffs.b0;
52+
const auto b2 = coeffs.b2 / coeffs.b0;
53+
54+
const auto a0Batch = xsimd::broadcast(a0);
55+
const auto a1Batch = xsimd::broadcast(a1);
56+
const auto a2Batch = xsimd::broadcast(a2);
57+
const auto b1Batch = xsimd::broadcast(b1);
58+
const auto b2Batch = xsimd::broadcast(b2);
59+
for (size_t i = 0; i < m_vecSize; i += m_simdSize) {
60+
a0Batch.store_aligned(&m_a0[i]);
61+
a1Batch.store_aligned(&m_a1[i]);
62+
a2Batch.store_aligned(&m_a2[i]);
63+
b1Batch.store_aligned(&m_b1[i]);
64+
b2Batch.store_aligned(&m_b2[i]);
65+
}
66+
for (size_t i = m_vecSize; i < N; ++i) {
67+
m_a0[i] = a0;
68+
m_a1[i] = a1;
69+
m_a2[i] = a2;
70+
m_b1[i] = b1;
71+
m_b2[i] = b2;
72+
}
73+
}
74+
75+
/**
76+
* Sets the coefficients for a specific biquad
77+
* @param index
78+
* @param coeffs
79+
*/
80+
auto setCoeffs(size_t index, BiquadCoefficients<SampleType> coeffs) noexcept -> void {
81+
m_equalCoeffs = false;
82+
const auto [a0, a1, a2, b0, b1, b2] = coeffs;
83+
m_a0[index] = a0 / b0;
84+
m_a1[index] = a1 / b0;
85+
m_a2[index] = a2 / b0;
86+
m_b1[index] = b1 / b0;
87+
m_b2[index] = b2 / b0;
88+
}
89+
90+
/**
91+
* Processes all samples in x through their respective biquads, and overwrites the values in x
92+
* @param x An array-like containing N samples to be filtered.
93+
*/
94+
auto operator()(std::span<SampleType, N> x) noexcept -> void {
95+
constexpr static auto sizeBytes = sizeof(SampleType) * N;
96+
std::memcpy(m_working.data(), x.data(), sizeBytes);
97+
for (size_t i = 0; i < m_vecSize; i += m_simdSize) {
98+
const auto& a0 = xsimd::load_aligned(&m_a0[i]);
99+
const auto& x0 = xsimd::load_aligned(&m_working[i]);
100+
const auto a0x0 = a0 * x0;
101+
const auto& a1 = xsimd::load_aligned(&m_a1[i]);
102+
const auto& x1 = xsimd::load_aligned(&m_x1[i]);
103+
const auto a1x1 = a1 * x1;
104+
const auto& a2 = xsimd::load_aligned(&m_a2[i]);
105+
const auto& x2 = xsimd::load_aligned(&m_x2[i]);
106+
const auto a2x2 = a2 * x2;
107+
const auto& b1 = xsimd::load_aligned(&m_b1[i]);
108+
const auto& y1 = xsimd::load_aligned(&m_y1[i]);
109+
const auto b1y1 = b1 * y1;
110+
const auto& b2 = xsimd::load_aligned(&m_b2[i]);
111+
const auto& y2 = xsimd::load_aligned(&m_y2[i]);
112+
const auto b2y2 = b2 * y2;
113+
const auto res = a0x0 + a1x1 + a2x2 - b1y1 - b2y2;
114+
x1.store_aligned(&m_x2[i]);
115+
x0.store_aligned(&m_x1[i]);
116+
y1.store_aligned(&m_y2[i]);
117+
res.store_aligned(&m_y1[i]);
118+
}
119+
120+
for (size_t i = m_vecSize; i < N; ++i) {
121+
const auto res = (m_a0[i] * m_working[i]) + (m_a1[i] * m_x1[i]) + (m_a2[i] * m_x2[i]) - (m_b1[i] * m_y1[i]) - (m_b2[i] * m_y2[i]);
122+
m_x2[i] = m_x1[i];
123+
m_x1[i] = m_working[i];
124+
m_y2[i] = m_y1[i];
125+
m_y1[i] = res;
126+
}
127+
std::memcpy(x.data(), m_y1.data(), sizeBytes);
128+
}
129+
130+
/**
131+
* Zeroes all internal state (except coefficients).
132+
*/
133+
auto reset() noexcept -> void {
134+
auto batch = xsimd::broadcast(0.0);
135+
for (size_t i = 0; i < m_vecSize; i += m_simdSize) {
136+
batch.store_aligned(&m_working[i]);
137+
batch.store_aligned(&m_x1[i]);
138+
batch.store_aligned(&m_x2[i]);
139+
batch.store_aligned(&m_y1[i]);
140+
batch.store_aligned(&m_y2[i]);
141+
}
142+
for (size_t i = m_vecSize; i < N; ++i) {
143+
m_working[i] = 0.0;
144+
m_x1[i] = 0.0;
145+
m_x2[i] = 0.0;
146+
m_y1[i] = 0.0;
147+
m_y2[i] = 0.0;
148+
}
149+
}
150+
151+
private:
152+
constexpr static auto m_simdSize = xsimd::simd_type<SampleType>::size;
153+
constexpr static auto m_vecSize = N - N % m_simdSize;
154+
bool m_equalCoeffs{ false };
155+
std::vector<SampleType, xsimd::aligned_allocator<SampleType>> m_working;
156+
std::vector<SampleType, xsimd::aligned_allocator<SampleType>> m_a0, m_a1, m_a2, m_b1, m_b2;
157+
std::vector<SampleType, xsimd::aligned_allocator<SampleType>> m_x1, m_x2, m_y1, m_y2;
158+
};
159+
160+
161+
} // namespace marvin::dsp::filters
162+
#endif // MARVIN_SIMDBIQUAD_H

source/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ set(MARVIN_SOURCES
2020
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/marvin_APF.cpp
2121
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/marvin_LPF.cpp
2222
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/marvin_SVF.cpp
23+
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_SIMDBiquad.cpp
2324
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_Biquad.cpp
2425
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_BiquadCoefficients.cpp
2526
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_RBJCoefficients.cpp
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
//
2+
// Created by Syl Morrison on 10/05/2025.
3+
//
4+
#include <marvin/dsp/filters/biquad/marvin_SIMDBiquad.h>

tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(MARVIN_TEST_SOURCE
1111
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/marvin_SVFTests.cpp
1212
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_BiquadTests.cpp
1313
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_SmoothedBiquadCoefficientsTests.cpp
14+
${CMAKE_CURRENT_SOURCE_DIR}/dsp/filters/biquad/marvin_SIMDBiquadTests.cpp
1415
${CMAKE_CURRENT_SOURCE_DIR}/library/marvin_ConceptsTests.cpp
1516
${CMAKE_CURRENT_SOURCE_DIR}/library/marvin_PropagateConstTests.cpp
1617
${CMAKE_CURRENT_SOURCE_DIR}/math/marvin_MathTests.cpp
@@ -23,7 +24,6 @@ set(MARVIN_TEST_SOURCE
2324
${CMAKE_CURRENT_SOURCE_DIR}/utils/marvin_SmoothedValueTests.cpp
2425
${CMAKE_CURRENT_SOURCE_DIR}/utils/marvin_RandomTests.cpp
2526
${CMAKE_CURRENT_SOURCE_DIR}/utils/marvin_FIFOTests.cpp
26-
# ${CMAKE_CURRENT_SOURCE_DIR}/utils/marvin_FormatReaderTests.cpp
2727
PARENT_SCOPE
2828
)
2929

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
//
2+
// Created by Syl Morrison on 03/05/2025.
3+
//
4+
#include "catch2/benchmark/catch_benchmark.hpp"
5+
6+
7+
#include <iostream>
8+
#include <marvin/dsp/filters/biquad/marvin_Biquad.h>
9+
#include <marvin/dsp/filters/biquad/marvin_RBJCoefficients.h>
10+
#include <marvin/dsp/filters/biquad/marvin_SIMDBiquad.h>
11+
#include <marvin/dsp/oscillators/marvin_Oscillator.h>
12+
#include <fmt/core.h>
13+
#include <catch2/catch_test_macros.hpp>
14+
#include <catch2/matchers/catch_matchers_floating_point.hpp>
15+
namespace marvin::testing {
16+
static std::random_device s_rd;
17+
template <marvin::FloatType SampleType>
18+
std::vector<SampleType> generateImpulse(size_t len) {
19+
std::vector<SampleType> impulse;
20+
impulse.resize(len);
21+
std::fill(impulse.begin(), impulse.end(), static_cast<SampleType>(0.0));
22+
impulse[0] = static_cast<SampleType>(1.0);
23+
return impulse;
24+
}
25+
template <FloatType T, size_t N>
26+
std::vector<T> generateNoise() {
27+
marvin::dsp::oscillators::NoiseOscillator<T> osc{ s_rd };
28+
std::vector<T> vec(N, static_cast<T>(0.0));
29+
for (auto i = 0; i < N; ++i) {
30+
vec[i] = osc();
31+
}
32+
return vec;
33+
}
34+
35+
TEST_CASE("Test parity with single biquad") {
36+
constexpr static auto sampleRate{ 44100.0 };
37+
constexpr static auto cutoff{ 200.0 };
38+
constexpr static auto q{ 0.7070 };
39+
marvin::dsp::filters::Biquad<double, 1> singleBiquad;
40+
marvin::dsp::filters::SIMDBiquad<double, 1> simdBiquad;
41+
auto lowpassCoeffs = dsp::filters::rbj::lowpass(sampleRate, cutoff, q);
42+
singleBiquad.setCoeffs(0, lowpassCoeffs);
43+
simdBiquad.setCoeffs(lowpassCoeffs);
44+
auto impulse = generateImpulse<double>(100);
45+
for (auto i = 0; i < impulse.size(); ++i) {
46+
const auto singleFiltered = singleBiquad(impulse[i]);
47+
std::array<double, 1> simdInput{ impulse[i] };
48+
simdBiquad(simdInput);
49+
std::cout << i;
50+
REQUIRE_THAT(simdInput[0], Catch::Matchers::WithinRel(singleFiltered, 0.1));
51+
}
52+
}
53+
54+
template <NumericType T>
55+
[[nodiscard]] std::string getTypeName() {
56+
if constexpr (std::is_same_v<T, float>) {
57+
return "float";
58+
} else if constexpr (std::is_same_v<T, double>) {
59+
return "double";
60+
}
61+
}
62+
63+
template <marvin::FloatType SampleType, size_t N, size_t NumSamples>
64+
auto benchmarkSIMD() -> void {
65+
constexpr static auto sampleRate{ 44100.0 };
66+
constexpr static auto cutoff{ 200.0 };
67+
constexpr static auto q{ 0.7070 };
68+
const auto coeffs = marvin::dsp::filters::rbj::lowpass<SampleType>(sampleRate, cutoff, q);
69+
std::array<marvin::dsp::filters::Biquad<SampleType, 1>, N> normalFilters;
70+
marvin::dsp::filters::SIMDBiquad<SampleType, N> simdFilters;
71+
for (auto& f : normalFilters) {
72+
f.setCoeffs(0, coeffs);
73+
}
74+
simdFilters.setCoeffs(coeffs);
75+
auto impulse = generateNoise<SampleType, NumSamples>();
76+
std::vector<std::array<SampleType, N>> simdInputs;
77+
for (auto& x : impulse) {
78+
std::array<SampleType, N> current;
79+
std::fill(current.begin(), current.end(), x);
80+
simdInputs.emplace_back(current);
81+
}
82+
BENCHMARK(fmt::format("Biquad, N = {}, NSamples = {}, Type = {}", N, NumSamples, getTypeName<SampleType>(), N)) {
83+
for (auto i = 0; i < impulse.size(); ++i) {
84+
const auto coeffs = marvin::dsp::filters::rbj::lowpass<SampleType>(sampleRate, cutoff + static_cast<SampleType>(i), q);
85+
86+
for (auto& f : normalFilters) {
87+
f.setCoeffs(0, coeffs);
88+
[[maybe_unused]] const auto _ = f(impulse[i]);
89+
}
90+
}
91+
};
92+
BENCHMARK(fmt::format("SIMDBiquad<{}>, NSamples = {}, Type = {}", N, NumSamples, getTypeName<SampleType>(), N)) {
93+
for (auto i = 0; i < impulse.size(); ++i) {
94+
const auto coeffs = marvin::dsp::filters::rbj::lowpass<SampleType>(sampleRate, cutoff + static_cast<SampleType>(i), q);
95+
simdFilters.setCoeffs(coeffs);
96+
simdFilters(simdInputs[i]);
97+
}
98+
};
99+
}
100+
101+
TEST_CASE("Benchmark Biquads") {
102+
benchmarkSIMD<float, 2, 32>();
103+
benchmarkSIMD<float, 3, 32>();
104+
benchmarkSIMD<float, 4, 32>();
105+
benchmarkSIMD<float, 5, 32>();
106+
benchmarkSIMD<float, 6, 32>();
107+
benchmarkSIMD<float, 7, 32>();
108+
benchmarkSIMD<float, 8, 32>();
109+
benchmarkSIMD<float, 9, 32>();
110+
benchmarkSIMD<float, 10, 32>();
111+
benchmarkSIMD<float, 11, 32>();
112+
benchmarkSIMD<float, 12, 32>();
113+
benchmarkSIMD<float, 13, 32>();
114+
benchmarkSIMD<float, 14, 32>();
115+
benchmarkSIMD<float, 15, 32>();
116+
benchmarkSIMD<float, 16, 32>();
117+
}
118+
119+
} // namespace marvin::testing

0 commit comments

Comments
 (0)