Fix AVX512DQ.

degasus · degasus · commit ed0f194b4242 · 2025-07-21T17:38:10.000+02:00
This file was completely disabled and never included, and so not tested and broken.

This patches fixes:
* Missing include in the _isa.hpp
* Typo in the include guard
* Variable name within fast_cast
* Requirements to avx512dq
diff --git a/include/xsimd/arch/xsimd_avx512dq.hpp b/include/xsimd/arch/xsimd_avx512dq.hpp
@@ -9,8 +9,8 @@
  * The full license is in the file LICENSE, distributed with this software. *
  ****************************************************************************/
 
-#ifndef XSIMD_AVX512_DQHPP
-#define XSIMD_AVX512_D_HPP
+#ifndef XSIMD_AVX512DQ_HPP
+#define XSIMD_AVX512DQ_HPP
 
 #include "../types/xsimd_avx512dq_register.hpp"
 
@@ -47,12 +47,12 @@ namespace xsimd
 
         // bitwise_not
         template <class A>
-        XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx512f>) noexcept
+        XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx512dq>) noexcept
         {
             return _mm512_xor_ps(self, _mm512_castsi512_ps(_mm512_set1_epi32(-1)));
         }
         template <class A>
-        XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx512f>) noexcept
+        XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx512dq>) noexcept
         {
             return _mm512_xor_pd(self, _mm512_castsi512_pd(_mm512_set1_epi32(-1)));
         }
@@ -96,7 +96,7 @@ namespace xsimd
             // tmp1 = [a0..8, b0..8]
             // tmp2 = [a8..f, b8..f]
 #define XSIMD_AVX512_HADDP_STEP1(I, a, b)                                \
-    batch<float, avx512f> res##I;                                        \
+    batch<float, avx512dq> res##I;                                        \
     {                                                                    \
         auto tmp1 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(1, 0, 1, 0)); \
         auto tmp2 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(3, 2, 3, 2)); \
@@ -180,7 +180,7 @@ namespace xsimd
 
         // reduce_add
         template <class A>
-        XSIMD_INLINE float reduce_add(batch<float, A> const& rhs, requires_arch<avx512f>) noexcept
+        XSIMD_INLINE float reduce_add(batch<float, A> const& rhs, requires_arch<avx512dq>) noexcept
         {
             __m256 tmp1 = _mm512_extractf32x8_ps(rhs, 1);
             __m256 tmp2 = _mm512_extractf32x8_ps(rhs, 0);
@@ -192,7 +192,7 @@ namespace xsimd
         namespace detail
         {
             template <class A>
-            XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& x, batch<double, A> const&, requires_arch<avx512dq>) noexcept
+            XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& self, batch<double, A> const&, requires_arch<avx512dq>) noexcept
             {
                 return _mm512_cvtepi64_pd(self);
             }
diff --git a/include/xsimd/arch/xsimd_isa.hpp b/include/xsimd/arch/xsimd_isa.hpp
@@ -72,6 +72,10 @@
 #include "./xsimd_avx512f.hpp"
 #endif
 
+#if XSIMD_WITH_AVX512DQ
+#include "./xsimd_avx512dq.hpp"
+#endif
+
 #if XSIMD_WITH_AVX512BW
 #include "./xsimd_avx512bw.hpp"
 #endif

Original file line number	Diff line number	Diff line change
`@@ -9,8 +9,8 @@`
`9`	`9`	`* The full license is in the file LICENSE, distributed with this software. *`
`10`	`10`	`****************************************************************************/`
`11`	`11`
`12`		`-#ifndef XSIMD_AVX512_DQHPP`
`13`		`-#define XSIMD_AVX512_D_HPP`
	`12`	`+#ifndef XSIMD_AVX512DQ_HPP`
	`13`	`+#define XSIMD_AVX512DQ_HPP`
`14`	`14`
`15`	`15`	`#include "../types/xsimd_avx512dq_register.hpp"`
`16`	`16`
`@@ -47,12 +47,12 @@ namespace xsimd`
`47`	`47`
`48`	`48`	`// bitwise_not`
`49`	`49`	`template <class A>`
`50`		`- XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx512f>) noexcept`
	`50`	`+ XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx512dq>) noexcept`
`51`	`51`	`{`
`52`	`52`	`return _mm512_xor_ps(self, _mm512_castsi512_ps(_mm512_set1_epi32(-1)));`
`53`	`53`	`}`
`54`	`54`	`template <class A>`
`55`		`- XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx512f>) noexcept`
	`55`	`+ XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx512dq>) noexcept`
`56`	`56`	`{`
`57`	`57`	`return _mm512_xor_pd(self, _mm512_castsi512_pd(_mm512_set1_epi32(-1)));`
`58`	`58`	`}`
`@@ -96,7 +96,7 @@ namespace xsimd`
`96`	`96`	`// tmp1 = [a0..8, b0..8]`
`97`	`97`	`// tmp2 = [a8..f, b8..f]`
`98`	`98`	`#define XSIMD_AVX512_HADDP_STEP1(I, a, b) \`
`99`		`- batch<float, avx512f> res##I; \`
	`99`	`+ batch<float, avx512dq> res##I; \`
`100`	`100`	`{ \`
`101`	`101`	`auto tmp1 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(1, 0, 1, 0)); \`
`102`	`102`	`auto tmp2 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(3, 2, 3, 2)); \`
`@@ -180,7 +180,7 @@ namespace xsimd`
`180`	`180`
`181`	`181`	`// reduce_add`
`182`	`182`	`template <class A>`
`183`		`- XSIMD_INLINE float reduce_add(batch<float, A> const& rhs, requires_arch<avx512f>) noexcept`
	`183`	`+ XSIMD_INLINE float reduce_add(batch<float, A> const& rhs, requires_arch<avx512dq>) noexcept`
`184`	`184`	`{`
`185`	`185`	`__m256 tmp1 = _mm512_extractf32x8_ps(rhs, 1);`
`186`	`186`	`__m256 tmp2 = _mm512_extractf32x8_ps(rhs, 0);`
`@@ -192,7 +192,7 @@ namespace xsimd`
`192`	`192`	`namespace detail`
`193`	`193`	`{`
`194`	`194`	`template <class A>`
`195`		`- XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& x, batch<double, A> const&, requires_arch<avx512dq>) noexcept`
	`195`	`+ XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& self, batch<double, A> const&, requires_arch<avx512dq>) noexcept`
`196`	`196`	`{`
`197`	`197`	`return _mm512_cvtepi64_pd(self);`
`198`	`198`	`}`