Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ jobs:
- target: tgl
- isax: -march=x86-64-v4 -mcx16 -mxsave -mpclmul -mfsgsbase -mrdrnd -mhle -mrdseed -maes -mclflushopt -mxsavec -mxsaves -msgx -mpku -msha -mrdpid -mavx512vpopcntdq -mavx512ifma -mavx512vbmi -mavx512vnni -mavx512vbmi2 -mavx512bitalg -mvpclmulqdq -mgfni -mvaes # icelake
target: icl
- isax: -march=x86-64-v4 -mcx16 -mxsave -mpclmul -mfsgsbase -mrdrnd -mhle -mrdseed -maes -mclflushopt -mxsavec -mxsaves -msgx -mpku -msha -mrdpid -mavx512vpopcntdq -mavx512ifma -mavx512vbmi -mavx512vnni -mavx512vbmi2 -mavx512bitalg -mvpclmulqdq -mgfni -mvaes -mpconfig -mwbnoinvd -mclwb -mmovdiri -mmovdir64b -menqcmd -mcldemote -mptwrite -mwaitpkg -mserialize -mtsxldtrk -muintr -mavxvnni -mavx512fp16 # sapphire rapids without bf16
# See https://github.com/simd-everywhere/simde/issues/1095
- isax: -march=x86-64-v4 -mcx16 -mxsave -mpclmul -mfsgsbase -mrdrnd -mhle -mrdseed -maes -mclflushopt -mxsavec -mxsaves -msgx -mpku -msha -mrdpid -mavx512vpopcntdq -mavx512ifma -mavx512vbmi -mavx512vnni -mavx512vbmi2 -mavx512bitalg -mvpclmulqdq -mgfni -mvaes -mpconfig -mwbnoinvd -mclwb -mmovdiri -mmovdir64b -menqcmd -mcldemote -mptwrite -mwaitpkg -mserialize -mtsxldtrk -muintr -mavxvnni -mavx512fp16 -mavx512bf16 # sapphire rapids
target: spr
env:
CFLAGS: -Wall -Wextra -Werror ${{ matrix.isax }}
Expand Down
16 changes: 16 additions & 0 deletions simde/x86/avx512/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,22 @@ simde_mm512_castsi512_si256 (simde__m512i a) {
#define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m512bh
simde_x_mm512_castpbh_epu16 (simde__m512i a) {
simde__m512bh r;
simde_memcpy(&r, &a, sizeof(r));
return r;
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m512i
simde_x_mm512_castepu16_pbh (simde__m512bh a) {
simde__m512i r;
simde_memcpy(&r, &a, sizeof(r));
return r;
}

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP

Expand Down
4 changes: 2 additions & 2 deletions simde/x86/avx512/dpbf16.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ simde_mm512_dpbf16_ps (simde__m512 src, simde__m512bh a, simde__m512bh b) {
SIMDE_FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_mask_dpbf16_ps (simde__m512 src, simde__mmask16 k, simde__m512bh a, simde__m512bh b) {
#if defined(SIMDE_X86_AVX512BF16_NATIVE)
#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !(defined(__cplusplus) && defined(HEDLEY_GCC_VERSION))
return _mm512_mask_dpbf16_ps(src, k, a, b);
#else
return simde_mm512_mask_mov_ps(src, k, simde_mm512_dpbf16_ps(src, a, b));
Expand All @@ -264,7 +264,7 @@ simde_mm512_mask_dpbf16_ps (simde__m512 src, simde__mmask16 k, simde__m512bh a,
SIMDE_FUNCTION_ATTRIBUTES
simde__m512
simde_mm512_maskz_dpbf16_ps (simde__mmask16 k, simde__m512 src, simde__m512bh a, simde__m512bh b) {
#if defined(SIMDE_X86_AVX512BF16_NATIVE)
#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !(defined(__cplusplus) && defined(HEDLEY_GCC_VERSION))
return _mm512_maskz_dpbf16_ps(k, src, a, b);
#else
return simde_mm512_maskz_mov_ps(k, simde_mm512_dpbf16_ps(src, a, b));
Expand Down
6 changes: 6 additions & 0 deletions simde/x86/avx512/storeu.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,12 @@ simde_mm512_storeu_ph (void * mem_addr, simde__m512h a) {
#define _mm512_storeu_ph(mem_addr, a) simde_mm512_storeu_ph(mem_addr, a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
void
simde_x_mm512_storeu_bh (void * mem_addr, simde__m512bh a) {
simde_memcpy(mem_addr, &a, sizeof(a));
}

SIMDE_FUNCTION_ATTRIBUTES
void
simde_mm512_storeu_si512 (void * mem_addr, simde__m512i a) {
Expand Down
4 changes: 2 additions & 2 deletions test/test.h
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(simde_poly64, p64)
} \
} \
\
char buf[53]; \
char buf[64]; \
simde_test_codegen_##symbol_identifier(sizeof(buf), buf, values[i]); \
fputs(buf, SIMDE_CODEGEN_FP); \
} \
Expand Down Expand Up @@ -746,7 +746,7 @@ SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(simde_poly64, p64, 4)
} \
\
{ \
char buf[53]; \
char buf[64]; \
simde_test_codegen_##symbol_identifier(sizeof(buf), buf, value); \
fputs(buf, SIMDE_CODEGEN_FP); \
} \
Expand Down
879 changes: 434 additions & 445 deletions test/x86/avx512/dpbf16.c

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/x86/avx512/test-avx512.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 8, 64, simde_mm512_storeu_si51
SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 16, 32, simde_mm512_storeu_si512)
SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 32, 16, simde_mm512_storeu_si512)
SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 64, 8, simde_mm512_storeu_si512)
SIMDE_TEST_X86_GENERATE_BFLOAT_TYPE_FUNCS_(__m512bh, 16, 32, simde_x_mm512_storeu_bh)

#define SIMDE_TEST_X86_GENERATE_MASK_FUNCS_(EL) \
static simde__mmask##EL \
Expand Down
47 changes: 47 additions & 0 deletions test/x86/test-x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,53 @@ HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
return simde_assert_close_vu##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, slop, filename, line, astr, bstr); \
}

#define SIMDE_TEST_X86_GENERATE_BFLOAT_TYPE_FUNCS_(NT, EL, EC, SF) \
static simde##NT \
simde_test_x86_random_bf##EL##x##EC(simde_bfloat##EL min, simde_bfloat##EL max) { \
simde_bfloat##EL values[sizeof(simde##NT) / sizeof(simde_bfloat##EL)]; \
simde_test_codegen_random_vbf##EL(sizeof(values) / sizeof(values[0]), values, min, max); \
simde##NT r; \
simde_memcpy(&r, values, sizeof(r)); \
return r; \
} \
\
static void \
simde_test_x86_write_bf##EL##x##EC(int indent, simde##NT value, SimdeTestVecPos pos) { \
simde_bfloat##EL values[sizeof(value) / sizeof(simde_bfloat##EL)]; \
SF(values, value); \
simde_test_codegen_write_vbf##EL(indent, sizeof(values) / sizeof(values[0]), values, pos); \
} \
\
static int \
simde_test_x86_assert_equal_bf##EL##x##EC##_(simde##NT a, simde##NT b, simde_bfloat##EL slop, \
const char* filename, int line, const char* astr, const char* bstr) { \
simde_bfloat##EL \
a_[sizeof(a) / sizeof(simde_bfloat##EL)], \
b_[sizeof(a) / sizeof(simde_bfloat##EL)]; \
\
SF(a_, a); \
SF(b_, b); \
\
return simde_assert_equal_vbf##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, slop, filename, line, astr, bstr); \
} \
\
static void \
simde_test_x86_random_bf##EL##x##EC##_full( \
size_t test_sets, size_t vectors_per_set, \
simde_bfloat##EL values[HEDLEY_ARRAY_PARAM(test_sets * vectors_per_set * (sizeof(simde##NT) / sizeof(simde_bfloat##EL)))], \
simde_bfloat##EL min, simde_bfloat##EL max, SimdeTestVecFloatType type) { \
simde_test_codegen_random_vbf##EL##_full(test_sets, vectors_per_set, sizeof(simde##NT) / sizeof(simde_bfloat##EL), values, min, max, type); \
} \
\
static simde##NT \
simde_test_x86_random_extract_bf##EL##x##EC(size_t set_num, size_t vectors_per_set, size_t vector_num, simde_bfloat##EL* values) { \
const size_t elem_cnt = sizeof(simde##NT) / sizeof(simde_bfloat##EL); \
const size_t set_cnt = elem_cnt * vectors_per_set; \
simde##NT r; \
simde_memcpy(&r, &(values[(set_num * set_cnt) + (vector_num * elem_cnt)]), sizeof(r)); \
return r; \
}

/* For compatibility only. Note that the operator is assumed to be == */
#define simde_assert_m64_i8(a, op, b) simde_test_x86_assert_equal_i8x8(a, b)
#define simde_assert_m64_i16(a, op, b) simde_test_x86_assert_equal_i16x4(a, b)
Expand Down
Loading