Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion simde/x86/avx512/conflict.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
#define SIMDE_X86_AVX512_CONFLICT_H

#include "types.h"
#include "mov_mask.h"
#include "mov.h"
#include "cmpeq.h"
#include "set1.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/cvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include "types.h"
#include "mov.h"
#include "setzero.h"
#include "../../simde-f16.h"

HEDLEY_DIAGNOSTIC_PUSH
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/cvts.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "types.h"
#include "mov.h"
#include "storeu.h"
#include "setzero.h"
#include "loadu.h"

HEDLEY_DIAGNOSTIC_PUSH
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/gather.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "types.h"
#include "../avx2.h"
#include "extract.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/insert.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "types.h"
#include "mov.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/movm.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "types.h"
#include "../avx2.h"
#include "set.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/permutex.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#define SIMDE_X86_AVX512_PERMUTEX_H

#include "types.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/round.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define SIMDE_X86_AVX512_ROUND_H

#include "types.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
1 change: 1 addition & 0 deletions simde/x86/avx512/roundscale.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "mul.h"
#include "round.h"
#include "cmpeq.h"
#include "setzero.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down
38 changes: 19 additions & 19 deletions simde/x86/avx512/storeu.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

#include "types.h"
#include "mov.h"
#include "setzero.h"
#include "loadu.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
Expand Down Expand Up @@ -58,8 +58,8 @@ simde_mm256_mask_storeu_epi8 (void * mem_addr, simde__mmask32 k, simde__m256i a)
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
_mm256_mask_storeu_epi8(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m256i zero = simde_mm256_setzero_si256();
simde_mm256_storeu_epi8(mem_addr, simde_mm256_mask_mov_epi8(zero, k, a));
const simde__m256i src = simde_mm256_loadu_epi8(mem_addr);
simde_mm256_storeu_epi8(mem_addr, simde_mm256_mask_mov_epi8(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
Expand All @@ -73,8 +73,8 @@ simde_mm256_mask_storeu_epi16 (void * mem_addr, simde__mmask16 k, simde__m256i a
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
_mm256_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m256i zero = simde_mm256_setzero_si256();
simde_mm256_storeu_epi16(mem_addr, simde_mm256_mask_mov_epi16(zero, k, a));
const simde__m256i src = simde_mm256_loadu_epi16(mem_addr);
simde_mm256_storeu_epi16(mem_addr, simde_mm256_mask_mov_epi16(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
Expand All @@ -88,8 +88,8 @@ simde_mm256_mask_storeu_epi32 (void * mem_addr, simde__mmask8 k, simde__m256i a)
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
_mm256_mask_storeu_epi32(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m256i zero = simde_mm256_setzero_si256();
simde_mm256_storeu_epi32(mem_addr, simde_mm256_mask_mov_epi32(zero, k, a));
const simde__m256i src = simde_mm256_loadu_epi32(mem_addr);
simde_mm256_storeu_epi32(mem_addr, simde_mm256_mask_mov_epi32(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
Expand All @@ -103,8 +103,8 @@ simde_mm256_mask_storeu_epi64 (void * mem_addr, simde__mmask8 k, simde__m256i a)
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
_mm256_mask_storeu_epi64(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m256i zero = simde_mm256_setzero_si256();
simde_mm256_storeu_epi64(mem_addr, simde_mm256_mask_mov_epi64(zero, k, a));
const simde__m256i src = simde_mm256_loadu_epi64(mem_addr);
simde_mm256_storeu_epi64(mem_addr, simde_mm256_mask_mov_epi64(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
Expand Down Expand Up @@ -188,8 +188,8 @@ simde_mm512_mask_storeu_epi16 (void * mem_addr, simde__mmask32 k, simde__m512i a
#if defined(SIMDE_X86_AVX512BW_NATIVE)
_mm512_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m512i zero = simde_mm512_setzero_si512();
simde_mm512_storeu_epi16(mem_addr, simde_mm512_mask_mov_epi16(zero, k, a));
const simde__m512i src = simde_mm512_loadu_epi16(mem_addr);
simde_mm512_storeu_epi16(mem_addr, simde_mm512_mask_mov_epi16(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
Expand All @@ -203,8 +203,8 @@ simde_mm512_mask_storeu_epi32 (void * mem_addr, simde__mmask16 k, simde__m512i a
#if defined(SIMDE_X86_AVX512F_NATIVE)
_mm512_mask_storeu_epi32(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m512i zero = simde_mm512_setzero_si512();
simde_mm512_storeu_epi32(mem_addr, simde_mm512_mask_mov_epi32(zero, k, a));
const simde__m512i src = simde_mm512_loadu_epi32(mem_addr);
simde_mm512_storeu_epi32(mem_addr, simde_mm512_mask_mov_epi32(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
Expand All @@ -218,8 +218,8 @@ simde_mm512_mask_storeu_epi64 (void * mem_addr, simde__mmask8 k, simde__m512i a)
#if defined(SIMDE_X86_AVX512F_NATIVE)
_mm512_mask_storeu_epi64(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m512i zero = simde_mm512_setzero_si512();
simde_mm512_storeu_epi64(mem_addr, simde_mm512_mask_mov_epi64(zero, k, a));
const simde__m512i src = simde_mm512_loadu_epi64(mem_addr);
simde_mm512_storeu_epi64(mem_addr, simde_mm512_mask_mov_epi64(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
Expand All @@ -233,8 +233,8 @@ simde_mm512_mask_storeu_ps (void * mem_addr, simde__mmask16 k, simde__m512 a) {
#if defined(SIMDE_X86_AVX512F_NATIVE)
_mm512_mask_storeu_ps(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m512 zero = simde_mm512_setzero_ps();
simde_mm512_storeu_ps(mem_addr, simde_mm512_mask_mov_ps(zero, k, a));
const simde__m512 src = simde_mm512_loadu_ps(mem_addr);
simde_mm512_storeu_ps(mem_addr, simde_mm512_mask_mov_ps(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
Expand All @@ -248,8 +248,8 @@ simde_mm512_mask_storeu_pd (void * mem_addr, simde__mmask8 k, simde__m512d a) {
#if defined(SIMDE_X86_AVX512F_NATIVE)
_mm512_mask_storeu_pd(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
#else
const simde__m512d zero = simde_mm512_setzero_pd();
simde_mm512_storeu_pd(mem_addr, simde_mm512_mask_mov_pd(zero, k, a));
const simde__m512d src = simde_mm512_loadu_pd(mem_addr);
simde_mm512_storeu_pd(mem_addr, simde_mm512_mask_mov_pd(src, k, a));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
Expand Down
132 changes: 67 additions & 65 deletions test/test.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,71 @@ simde_test_debug_printf_(const char* format, ...) {
#define SIMDE_TEST_STRUCT_MODIFIERS static const
#endif

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_

static int
simde_test_equal_f32(simde_float32 a, simde_float32 b, simde_float32 slop) {
if (simde_math_isnan(a)) {
return simde_math_isnan(b);
} else if (simde_math_isinf(a)) {
return !((a < b) || (a > b));
} else if (slop == SIMDE_FLOAT32_C(0.0)) {
return !simde_memcmp(&a, &b, sizeof(simde_float32));
} else {
simde_float32 lo = a - slop;
if (HEDLEY_UNLIKELY(lo == a))
lo = simde_math_nextafterf(a, -SIMDE_MATH_INFINITYF);

simde_float32 hi = a + slop;
if (HEDLEY_UNLIKELY(hi == a))
hi = simde_math_nextafterf(a, SIMDE_MATH_INFINITYF);

return ((b >= lo) && (b <= hi));
}
}

static int
simde_test_equal_f16(simde_float16 a, simde_float16 b, simde_float16 slop) {
simde_float32
af = simde_float16_to_float32(a),
bf = simde_float16_to_float32(b),
slopf = simde_float16_to_float32(slop);
return simde_test_equal_f32(af, bf, slopf);
}

static int
simde_test_equal_f64(simde_float64 a, simde_float64 b, simde_float64 slop) {
if (simde_math_isnan(a)) {
return simde_math_isnan(b);
} else if (simde_math_isinf(a)) {
return !((a < b) || (a > b));
} else if (slop == SIMDE_FLOAT64_C(0.0)) {
return !simde_memcmp(&a, &b, sizeof(simde_float64));
} else {
simde_float64 lo = a - slop;
if (HEDLEY_UNLIKELY(lo == a))
lo = simde_math_nextafter(a, -SIMDE_MATH_INFINITY);

simde_float64 hi = a + slop;
if (HEDLEY_UNLIKELY(hi == a))
hi = simde_math_nextafter(a, SIMDE_MATH_INFINITY);

return ((b >= lo) && (b <= hi));
}
}

static int
simde_test_equal_bf16(simde_bfloat16 a, simde_bfloat16 b, simde_bfloat16 slop) {
simde_float32
af = simde_bfloat16_to_float32(a),
bf = simde_bfloat16_to_float32(b),
slopf = simde_bfloat16_to_float32(slop);
return simde_test_equal_f32(af, bf, slopf);
}

HEDLEY_DIAGNOSTIC_POP

HEDLEY_PRINTF_FORMAT(3, 4)
static void
simde_test_codegen_snprintf_(char* str, size_t size, const char* format, ...) {
Expand Down Expand Up @@ -153,6 +218,8 @@ static void
simde_test_codegen_f32(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], simde_float32 value) {
if (simde_math_isnan(value)) {
simde_test_codegen_snprintf_(buf, buf_len, " SIMDE_MATH_NANF");
} else if (simde_test_equal_f32(value, SIMDE_MATH_FLT_MAX, simde_math_powf(SIMDE_FLOAT32_C(10.0), SIMDE_FLOAT32_C(-1.0)))) {
simde_test_codegen_snprintf_(buf, buf_len, " SIMDE_MATH_FLT_MAX");
} else if (simde_math_isinf(value)) {
simde_test_codegen_snprintf_(buf, buf_len, "%5cSIMDE_MATH_INFINITYF", value < 0 ? '-' : ' ');
} else {
Expand Down Expand Up @@ -734,71 +801,6 @@ SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(simde_bfloat16, bf16)
#define simde_test_codegen_write_1p64(indent, value) simde_test_codegen_write_p64_full((indent), #value, (value), SIMDE_TEST_VEC_POS_SINGLE)
#define simde_test_codegen_write_1bf16(indent, value) simde_test_codegen_write_bf16_full((indent), #value, (value), SIMDE_TEST_VEC_POS_SINGLE)

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_

static int
simde_test_equal_f32(simde_float32 a, simde_float32 b, simde_float32 slop) {
if (simde_math_isnan(a)) {
return simde_math_isnan(b);
} else if (simde_math_isinf(a)) {
return !((a < b) || (a > b));
} else if (slop == SIMDE_FLOAT32_C(0.0)) {
return !simde_memcmp(&a, &b, sizeof(simde_float32));
} else {
simde_float32 lo = a - slop;
if (HEDLEY_UNLIKELY(lo == a))
lo = simde_math_nextafterf(a, -SIMDE_MATH_INFINITYF);

simde_float32 hi = a + slop;
if (HEDLEY_UNLIKELY(hi == a))
hi = simde_math_nextafterf(a, SIMDE_MATH_INFINITYF);

return ((b >= lo) && (b <= hi));
}
}

static int
simde_test_equal_f16(simde_float16 a, simde_float16 b, simde_float16 slop) {
simde_float32
af = simde_float16_to_float32(a),
bf = simde_float16_to_float32(b),
slopf = simde_float16_to_float32(slop);
return simde_test_equal_f32(af, bf, slopf);
}

static int
simde_test_equal_f64(simde_float64 a, simde_float64 b, simde_float64 slop) {
if (simde_math_isnan(a)) {
return simde_math_isnan(b);
} else if (simde_math_isinf(a)) {
return !((a < b) || (a > b));
} else if (slop == SIMDE_FLOAT64_C(0.0)) {
return !simde_memcmp(&a, &b, sizeof(simde_float64));
} else {
simde_float64 lo = a - slop;
if (HEDLEY_UNLIKELY(lo == a))
lo = simde_math_nextafter(a, -SIMDE_MATH_INFINITY);

simde_float64 hi = a + slop;
if (HEDLEY_UNLIKELY(hi == a))
hi = simde_math_nextafter(a, SIMDE_MATH_INFINITY);

return ((b >= lo) && (b <= hi));
}
}

static int
simde_test_equal_bf16(simde_bfloat16 a, simde_bfloat16 b, simde_bfloat16 slop) {
simde_float32
af = simde_bfloat16_to_float32(a),
bf = simde_bfloat16_to_float32(b),
slopf = simde_bfloat16_to_float32(slop);
return simde_test_equal_f32(af, bf, slopf);
}

HEDLEY_DIAGNOSTIC_POP

static simde_float16
simde_test_f16_precision_to_slop(int precision) {
return HEDLEY_UNLIKELY(precision == INT_MAX) ? SIMDE_FLOAT16_VALUE(0.0) : simde_float16_from_float32(simde_math_powf(SIMDE_FLOAT32_C(10.0), -HEDLEY_STATIC_CAST(float, precision)));
Expand Down
Loading
Loading