Skip to content

Commit 8234115

Browse files
committed
Add vectorized version of filter
1 parent fe28dca commit 8234115

File tree

5 files changed

+64
-3
lines changed

5 files changed

+64
-3
lines changed

cpp/CMakeLists.txt

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,14 @@ FetchContent_Declare(
3131
GIT_TAG e593f6695c6065e6b345fe2862f04a519ed484e0
3232
)
3333

34-
FetchContent_MakeAvailable(argparse mdspan cnpy spdlog)
34+
FetchContent_Declare(
35+
eve
36+
GIT_REPOSITORY https://github.com/jfalcou/eve.git
37+
GIT_TAG b2d8b637e71d132654c52480549e9b79944d1f74
38+
)
39+
40+
option(EVE_BUILD_TEST "Build EVE tests" OFF)
41+
FetchContent_MakeAvailable(argparse mdspan cnpy spdlog eve)
3542
target_include_directories(cnpy PUBLIC ${CNPY_SOURCE_DIR})
3643

3744
# ==========================
@@ -44,6 +51,7 @@ target_link_libraries(fftw-cpp INTERFACE mdspan)
4451
option(ENABLE_AVX512 "Enable AVX512 instructions (e.g. might want to turn off for the sake of Valgrind)" OFF)
4552
if (ENABLE_AVX512)
4653
add_compile_options("-mavx512f" "-march=native")
54+
add_compile_definitions(AVX512_ENABLED)
4755
endif ()
4856

4957
option(ENABLE_CILK "Enable Cilk parallelism" ON)
@@ -66,7 +74,8 @@ add_library(src-lib OBJECT lib/Naive.cpp lib/Naive.hpp lib/HermiteRunner.cpp lib
6674
lib/Brackets.hpp lib/Brackets.cpp
6775
)
6876
target_include_directories(src-lib PUBLIC lib/)
69-
target_link_libraries(src-lib mdspan fftw-cpp cnpy spdlog::spdlog)
77+
# TODO give eve only to vectorized targets
78+
target_link_libraries(src-lib mdspan fftw-cpp cnpy spdlog::spdlog eve::eve)
7079

7180
add_subdirectory(bench)
7281

cpp/bench/hl-filter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,7 @@ BENCHMARK(BM_HouLiFilter<HouLiFilterCached>)
3535
BENCHMARK(BM_HouLiFilter<HouLiFilterCached1D>)
3636
->ArgsProduct({{2048, 4096, 8192}, {2048, 4096, 8192}})
3737
->Unit(benchmark::kMillisecond);
38+
39+
BENCHMARK(BM_HouLiFilter<HouLiFilterCached1DVector>)
40+
->ArgsProduct({{2048, 4096, 8192}, {2048, 4096, 8192}})
41+
->Unit(benchmark::kMillisecond);

cpp/lib/Filter.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
//
44

55
#include "Filter.hpp"
6+
7+
#include <eve/module/core.hpp>
8+
69
namespace ahr {
710
void HouLiFilter::operator()(Grid::View::C_XY view) const {
811
grid.for_each_kxky([&](Dim kx, Dim ky) {
@@ -39,4 +42,33 @@ void HouLiFilterCached1D::operator()(Grid::View::C_XY view) const {
3942
view(kx, ky) *= factors_x[kx] * factors_y[ky];
4043
});
4144
}
45+
46+
HouLiFilterCached1DVector::HouLiFilterCached1DVector(Grid const &grid)
47+
: HouLiFilterCached1D(grid), factors_x_duped(2 * grid.KX) {
48+
assert(grid.KY % C_WIDTH == 0);
49+
for (Dim kx = 0; kx < grid.KX; ++kx) {
50+
factors_x_duped[kx * 2] = factors_x[kx];
51+
factors_x_duped[kx * 2 + 1] = factors_x[kx];
52+
}
53+
}
54+
55+
void HouLiFilterCached1DVector::operator()(Grid::View::C_XY view) const {
56+
for (int ky = 0; ky < grid.KY; ++ky) {
57+
int kx = 0;
58+
for (; kx <= grid.KX - C_WIDTH; kx += C_WIDTH) {
59+
Real *view_addr = (Real *)&view(kx, ky);
60+
VReal input{view_addr};
61+
62+
VReal vfx{&factors_x_duped[kx * 2]};
63+
VReal vfy{factors_y[ky]};
64+
65+
eve::store(input * vfx * vfy, view_addr);
66+
}
67+
68+
// tail
69+
for (; kx < grid.KX; ++kx) {
70+
view(kx, ky) *= factors_x[kx] * factors_y[ky];
71+
}
72+
}
73+
}
4274
} // namespace ahr

cpp/lib/Filter.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include "constants.hpp"
33
#include "grid.hpp"
4+
#include <eve/wide.hpp>
45

56
namespace ahr {
67

@@ -33,4 +34,19 @@ class HouLiFilterCached1D : protected HouLiFilter {
3334
protected:
3435
std::vector<Real> factors_x, factors_y;
3536
};
37+
38+
class HouLiFilterCached1DVector : HouLiFilterCached1D {
39+
public:
40+
explicit HouLiFilterCached1DVector(Grid const &grid);
41+
void operator()(Grid::View::C_XY view) const;
42+
43+
private:
44+
using VReal = eve::wide<Real>;
45+
static auto constexpr R_WIDTH = VReal::size();
46+
static auto constexpr C_WIDTH = VReal::size() / 2;
47+
48+
/// A pre-expanded vector of 2d factors
49+
std::vector<Real> factors_x_duped;
50+
};
51+
3652
} // namespace ahr

cpp/test/filter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ template <typename TestedFilter> class TestFilter : public ::testing::Test {
1414
TestedFilter filter_t{grid};
1515
};
1616

17-
using Types = ::testing::Types<HouLiFilterCached, HouLiFilterCached1D>;
17+
using Types = ::testing::Types<HouLiFilterCached, HouLiFilterCached1D, HouLiFilterCached1DVector>;
1818
TYPED_TEST_SUITE(TestFilter, Types);
1919

2020
TYPED_TEST(TestFilter, Filter) {

0 commit comments

Comments
 (0)