Skip to content

Commit d9f616c

Browse files
author
Fikret Ardal
committed
fix avx512f + test it on vm
1 parent 701c8c8 commit d9f616c

File tree

2 files changed

+14
-23
lines changed

2 files changed

+14
-23
lines changed

c++/nda/simd/arch/AVX512/type.hpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace nda {
2424

2525
simd_type(const value_t *v, simd_aligned_memory) : value(_mm512_load_epi32(v)) {}
2626

27-
simd_type(const value_t *v, simd_unaligned_memory) : value(_mm512_load_epi64(v)) {}
27+
simd_type(const value_t *v, simd_unaligned_memory) : value(_mm512_loadu_epi64(v)) {}
2828

2929
simd_type(simd_zero_initialize) : value(_mm512_setzero_epi32()) {}
3030

@@ -55,7 +55,7 @@ namespace nda {
5555
const simd_i8 hi_2(_mm512_extracti64x4_epi64(rhs.value, 1));
5656
const simd_i8 lo = lo_1 / lo_2;
5757
const simd_i8 hi = hi_1 / hi_2;
58-
return simd_type{_mm512_inserti64x4(_mm512_castsi256_si512(lo.value), hi.value, 1)};
58+
return simd_type{_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1)};
5959
}
6060

6161
simd_type &operator+=(const simd_type &other) {
@@ -233,15 +233,6 @@ namespace nda {
233233
lhs.store(x.data());
234234
rhs.store(y.data());
235235
for (int i = 0; i < static_cast<int>(size()); i++) { x[i] = x[i] / y[i]; }
236-
return simd_type{x.data(), 0};
237-
}
238-
239-
simd_type operator/(const simd_type &other) const {
240-
alignas(alignment()) std::array<value_t, size()> x{};
241-
alignas(alignment()) std::array<value_t, size()> y{};
242-
this->store(x.data());
243-
other.store(y.data());
244-
for (int i = 0; i < size(); i++) { x[i] = x[i] / y[i]; }
245236
return simd_type{x.data(), simd_aligned_memory_t};
246237
}
247238

@@ -757,17 +748,17 @@ namespace nda {
757748

758749
explicit simd_type(const intrinsic_t &v) : value(v) {}
759750

760-
simd_type(const value_t *v, simd_aligned_memory) : value(_mm512_load_ps(reinterpret_cast<const scalar_t *>(v))) {}
751+
simd_type(const value_t *v, simd_aligned_memory) : value(_mm512_load_ps(v)) {}
761752

762753
simd_type(const scalar_t *v, simd_aligned_memory) : value(_mm512_load_ps(v)) {}
763754

764-
simd_type(const value_t *v, simd_unaligned_memory) : value(_mm512_loadu_ps(reinterpret_cast<const scalar_t *>(v))) {}
755+
simd_type(const value_t *v, simd_unaligned_memory) : value(_mm512_loadu_ps(v)) {}
765756

766757
simd_type(const scalar_t *v, simd_unaligned_memory) : value(_mm512_loadu_ps(v)) {}
767758

768759
simd_type(simd_zero_initialize) : value(_mm512_setzero_ps()) {}
769760

770-
simd_type(const std::array<value_t, 8> &v) : value(_mm512_loadu_ps(reinterpret_cast<const scalar_t *>(v.data()))) {}
761+
simd_type(const std::array<value_t, 8> &v) : value(_mm512_loadu_ps(v.data())) {}
771762

772763
simd_type(const std::array<scalar_t, 16> &v) : value(_mm512_loadu_ps(v.data())) {}
773764

test/c++/nda_simd.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -791,12 +791,12 @@ TEST(NDA, SimdDefaultConstructor) {
791791

792792
#ifdef __AVX512F__
793793
// AVX512 SIMD types
794-
simd_type_default_constructor<float, 16, abi_tag::AVX512>();
795-
simd_type_default_constructor<double, 8, abi_tag::AVX512>();
796-
simd_type_default_constructor<int32_t, 16, abi_tag::AVX512>();
797-
simd_type_default_constructor<int64_t, 8, abi_tag::AVX512>();
798-
simd_type_default_constructor<std::complex<float>, 8, abi_tag::AVX512>();
799-
simd_type_default_constructor<std::complex<double>, 4, abi_tag::AVX512>();
794+
simd_zero_initialization<float, 16, abi_tag::AVX512>();
795+
simd_zero_initialization<double, 8, abi_tag::AVX512>();
796+
simd_zero_initialization<int32_t, 16, abi_tag::AVX512>();
797+
simd_zero_initialization<int64_t, 8, abi_tag::AVX512>();
798+
simd_zero_initialization<std::complex<float>, 8, abi_tag::AVX512>();
799+
simd_zero_initialization<std::complex<double>, 4, abi_tag::AVX512>();
800800
#endif
801801
}
802802

@@ -1680,10 +1680,10 @@ TEST(NDA, SimdKernelTranspose) {
16801680

16811681
#ifdef __AVX512F__
16821682
// AVX512 SIMD types
1683-
// simd_kernel_transpose<float, 16, abi_tag::AVX512>();
1683+
simd_kernel_transpose<float, 16, abi_tag::AVX512>();
16841684
simd_kernel_transpose<double, 8, abi_tag::AVX512>();
1685-
// simd_kernel_transpose<int32_t, 16, abi_tag::AVX512>();
1686-
// simd_kernel_transpose<int64_t, 8, abi_tag::AVX512>();
1685+
simd_kernel_transpose<int32_t, 16, abi_tag::AVX512>();
1686+
simd_kernel_transpose<int64_t, 8, abi_tag::AVX512>();
16871687
simd_kernel_transpose<std::complex<float>, 8, abi_tag::AVX512>();
16881688
simd_kernel_transpose<std::complex<double>, 4, abi_tag::AVX512>();
16891689
#endif

0 commit comments

Comments
 (0)