Skip to content

Commit c67330f

Browse files
committed
x86 avx512: enable 128h & 256h float16 types
1 parent e71f98f commit c67330f

File tree

8 files changed

+794
-6
lines changed

8 files changed

+794
-6
lines changed

simde/x86/avx512/load.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
*
2323
* Copyright:
2424
* 2020 Evan Nemerson <evan@nemerson.com>
25+
* 2025 Michael R. Crusoe <crusoe@debian.org>
2526
*/
2627

2728
#if !defined(SIMDE_X86_AVX512_LOAD_H)
@@ -33,6 +34,38 @@ HEDLEY_DIAGNOSTIC_PUSH
3334
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
3435
SIMDE_BEGIN_DECLS_
3536

37+
SIMDE_FUNCTION_ATTRIBUTES
38+
simde__m128h
39+
simde_mm_load_ph (void const * mem_addr) {
40+
#if defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
41+
return _mm_load_ph(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128h));
42+
#else
43+
simde__m128h r;
44+
simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128h), sizeof(r));
45+
return r;
46+
#endif
47+
}
48+
#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
49+
#undef _mm_load_ph
50+
#define _mm_load_ph(a) simde_mm_load_ph(a)
51+
#endif
52+
53+
SIMDE_FUNCTION_ATTRIBUTES
54+
simde__m256h
55+
simde_mm256_load_ph (void const * mem_addr) {
56+
#if defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
57+
return _mm256_load_ph(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256h));
58+
#else
59+
simde__m256h r;
60+
simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256h), sizeof(r));
61+
return r;
62+
#endif
63+
}
64+
#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
65+
#undef _mm256_load_ph
66+
#define _mm256_load_ph(a) simde_mm256_loadu_ph(a)
67+
#endif
68+
3669
SIMDE_FUNCTION_ATTRIBUTES
3770
simde__m512d
3871
simde_mm512_load_pd (void const * mem_addr) {

simde/x86/avx512/loadu.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
*
2323
* Copyright:
2424
* 2020 Evan Nemerson <evan@nemerson.com>
25+
* 2025 Michael R. Crusoe <crusoe@debian.org>
2526
*/
2627

2728
#if !defined(SIMDE_X86_AVX512_LOADU_H)
@@ -37,6 +38,38 @@ HEDLEY_DIAGNOSTIC_PUSH
3738
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
3839
SIMDE_BEGIN_DECLS_
3940

41+
SIMDE_FUNCTION_ATTRIBUTES
42+
simde__m128h
43+
simde_mm_loadu_ph (void const * mem_addr) {
44+
#if defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
45+
return _mm_loadu_ph(mem_addr);
46+
#else
47+
simde__m128h r;
48+
simde_memcpy(&r, mem_addr, sizeof(r));
49+
return r;
50+
#endif
51+
}
52+
#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
53+
#undef _mm_loadu_ph
54+
#define _mm_loadu_ph(a) simde_mm_loadu_ph(a)
55+
#endif
56+
57+
SIMDE_FUNCTION_ATTRIBUTES
58+
simde__m256h
59+
simde_mm256_loadu_ph (void const * mem_addr) {
60+
#if defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
61+
return _mm256_loadu_ph(mem_addr);
62+
#else
63+
simde__m256h r;
64+
simde_memcpy(&r, mem_addr, sizeof(r));
65+
return r;
66+
#endif
67+
}
68+
#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
69+
#undef _mm256_loadu_ph
70+
#define _mm256_loadu_ph(a) simde_mm256_loadu_ph(a)
71+
#endif
72+
4073
SIMDE_FUNCTION_ATTRIBUTES
4174
simde__m512
4275
simde_mm512_loadu_ps (void const * mem_addr) {

simde/x86/avx512/storeu.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,20 @@ simde_mm_mask_storeu_epi64 (void * mem_addr, simde__mmask8 k, simde__m128i a) {
113113
#define _mm_mask_storeu_epi64(mem_addr, k, a) simde_mm_mask_storeu_epi64(mem_addr, k, a)
114114
#endif
115115

116+
SIMDE_FUNCTION_ATTRIBUTES
117+
void
118+
simde_mm_storeu_ph (void * mem_addr, simde__m128h a) {
119+
#if defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
120+
_mm_storeu_ph(mem_addr, a);
121+
#else
122+
simde_memcpy(mem_addr, &a, sizeof(a));
123+
#endif
124+
}
125+
#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
126+
#undef _mm_storeu_ph
127+
#define _mm_storeu_ph(mem_addr, a) simde_mm_storeu_ph(mem_addr, a)
128+
#endif
129+
116130
SIMDE_FUNCTION_ATTRIBUTES
117131
void
118132
simde_mm_mask_storeu_ps(void * mem_addr, simde__mmask8 k, simde__m128 a) {
@@ -160,6 +174,20 @@ simde_mm_mask_storeu_pd(void * mem_addr, simde__mmask8 k, simde__m128d a) {
160174
#define _mm256_storeu_epi64(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a)
161175
#endif
162176

177+
SIMDE_FUNCTION_ATTRIBUTES
178+
void
179+
simde_mm256_storeu_ph (void * mem_addr, simde__m256h a) {
180+
#if defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
181+
_mm256_storeu_ph(mem_addr, a);
182+
#else
183+
simde_memcpy(mem_addr, &a, sizeof(a));
184+
#endif
185+
}
186+
#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
187+
#undef _mm256_storeu_ph
188+
#define _mm256_storeu_ph(mem_addr, a) simde_mm256_storeu_ph(mem_addr, a)
189+
#endif
190+
163191
SIMDE_FUNCTION_ATTRIBUTES
164192
void
165193
simde_mm256_mask_storeu_epi8 (void * mem_addr, simde__mmask32 k, simde__m256i a) {

simde/x86/avx512/types.h

Lines changed: 175 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,131 @@ SIMDE_BEGIN_DECLS_
5858
# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64
5959
# endif
6060

61+
typedef union {
62+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
63+
SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
64+
SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
65+
SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
66+
SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
67+
SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
68+
SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
69+
SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
70+
SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
71+
#if defined(SIMDE_HAVE_INT128_)
72+
SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
73+
SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
74+
#endif
75+
#if defined(SIMDE_FLOAT16_VECTOR)
76+
SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
77+
#else
78+
SIMDE_ALIGN_TO_16 simde_float16 f16[8];
79+
#endif
80+
SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
81+
SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
82+
SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
83+
SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
84+
#else
85+
SIMDE_ALIGN_TO_16 int8_t i8[16];
86+
SIMDE_ALIGN_TO_16 int16_t i16[8];
87+
SIMDE_ALIGN_TO_16 int32_t i32[4];
88+
SIMDE_ALIGN_TO_16 int64_t i64[2];
89+
SIMDE_ALIGN_TO_16 uint8_t u8[16];
90+
SIMDE_ALIGN_TO_16 uint16_t u16[8];
91+
SIMDE_ALIGN_TO_16 uint32_t u32[4];
92+
SIMDE_ALIGN_TO_16 uint64_t u64[2];
93+
#if defined(SIMDE_HAVE_INT128_)
94+
SIMDE_ALIGN_TO_16 simde_int128 i128[1];
95+
SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
96+
#endif
97+
SIMDE_ALIGN_TO_16 simde_float16 f16[8];
98+
SIMDE_ALIGN_TO_16 simde_float32 f32[4];
99+
SIMDE_ALIGN_TO_16 simde_float64 f64[2];
100+
SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)];
101+
SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
102+
#endif
103+
104+
#if defined(SIMDE_X86_AVX512FP16_NATIVE)
105+
SIMDE_ALIGN_TO_16 __m128h n;
106+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
107+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8;
108+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16;
109+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32;
110+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8;
111+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16;
112+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32;
113+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32;
114+
#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
115+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
116+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64;
117+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64;
118+
#endif
119+
#endif
120+
} simde__m128h_private;
121+
122+
typedef union {
123+
#if defined(SIMDE_VECTOR_SUBSCRIPT)
124+
SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
125+
SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
126+
SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
127+
SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
128+
SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
129+
SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
130+
SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
131+
SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
132+
#if defined(SIMDE_HAVE_INT128_)
133+
SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
134+
SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
135+
#endif
136+
#if defined(SIMDE_FLOAT16_VECTOR)
137+
SIMDE_ALIGN_TO_16 simde_float16 f16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
138+
#else
139+
SIMDE_ALIGN_TO_32 simde_float16 f16[16];
140+
#endif
141+
SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
142+
SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
143+
SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
144+
SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
145+
#else
146+
SIMDE_ALIGN_TO_32 int8_t i8[32];
147+
SIMDE_ALIGN_TO_32 int16_t i16[16];
148+
SIMDE_ALIGN_TO_32 int32_t i32[8];
149+
SIMDE_ALIGN_TO_32 int64_t i64[4];
150+
SIMDE_ALIGN_TO_32 uint8_t u8[32];
151+
SIMDE_ALIGN_TO_32 uint16_t u16[16];
152+
SIMDE_ALIGN_TO_32 uint32_t u32[8];
153+
SIMDE_ALIGN_TO_32 uint64_t u64[4];
154+
#if defined(SIMDE_HAVE_INT128_)
155+
SIMDE_ALIGN_TO_32 simde_int128 i128[2];
156+
SIMDE_ALIGN_TO_32 simde_uint128 u128[2];
157+
#endif
158+
SIMDE_ALIGN_TO_32 simde_float16 f16[16];
159+
SIMDE_ALIGN_TO_32 simde_float32 f32[8];
160+
SIMDE_ALIGN_TO_32 simde_float64 f64[4];
161+
SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)];
162+
SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)];
163+
#endif
164+
165+
SIMDE_ALIGN_TO_32 simde__m128h_private m128h_private[2];
166+
// SIMDE_ALIGN_TO_32 simde__m128h m128h[2];
167+
168+
#if defined(SIMDE_X86_AVX512FP16_NATIVE)
169+
SIMDE_ALIGN_TO_32 __m256h n;
170+
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
171+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2];
172+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2];
173+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2];
174+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2];
175+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2];
176+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2];
177+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2];
178+
#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
179+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2];
180+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2];
181+
SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2];
182+
#endif
183+
#endif
184+
} simde__m256h_private;
185+
61186
typedef union {
62187
#if defined(SIMDE_VECTOR_SUBSCRIPT)
63188
SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
@@ -442,7 +567,6 @@ typedef union {
442567
#endif
443568
} simde__m512h_private;
444569

445-
446570
typedef union {
447571
#if defined(SIMDE_VECTOR_SUBSCRIPT)
448572
SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
@@ -569,10 +693,16 @@ typedef union {
569693

570694
#if defined(SIMDE_X86_AVX512FP16_NATIVE)
571695
typedef __m512h simde__m512h;
696+
typedef __m256h simde__m256h;
697+
typedef __m128h simde__m128h;
572698
#else
573699
#if defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_FLOAT16_VECTOR)
700+
typedef simde_float16 simde__m128h SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
701+
typedef simde_float16 simde__m256h SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS;
574702
typedef simde_float16 simde__m512h SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS;
575703
#else
704+
typedef simde__m128h_private simde__m128h;
705+
typedef simde__m256h_private simde__m256h;
576706
typedef simde__m512h_private simde__m512h;
577707
#endif
578708
#endif
@@ -647,18 +777,22 @@ typedef uint64_t simde__mmask64;
647777

648778
#if !defined(SIMDE_X86_AVX512FP16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES)
649779
#if !defined(HEDLEY_INTEL_VERSION)
650-
//typedef simde__m128h __m128h;
651-
//typedef simde__m256h __m256h;
780+
typedef simde__m128h __m128h;
781+
typedef simde__m256h __m256h;
652782
typedef simde__m512h __m512h;
653783
#else
654-
//#define __m128h simde__m128h
655-
//#define __m256h simde__m256h
784+
#define __m128h simde__m128h
785+
#define __m256h simde__m256h
656786
#define __m512h simde__m512h
657787
#endif
658788
#endif
659789

790+
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128h), "simde__m128h size incorrect");
791+
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128h_private), "simde__m128h_private size incorrect");
660792
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect");
661793
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect");
794+
HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256h), "simde__m256h size incorrect");
795+
HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256h_private), "simde__m256h_private size incorrect");
662796
HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect");
663797
HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect");
664798
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect");
@@ -672,8 +806,12 @@ HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private s
672806
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h), "simde__m512h size incorrect");
673807
HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512h_private), "simde__m512h_private size incorrect");
674808
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
809+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128h) == 16, "simde__m128h is not 16-byte aligned");
810+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128h_private) == 16, "simde__m128h_private is not 16-byte aligned");
675811
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned");
676812
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned");
813+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256h) == 32, "simde__m256h is not 16-byte aligned");
814+
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256h_private) == 32, "simde__m256h_private is not 16-byte aligned");
677815
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned");
678816
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned");
679817
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned");
@@ -707,6 +845,22 @@ HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512h_private) == 32, "simde__m512h_p
707845
#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE
708846
#endif
709847

848+
SIMDE_FUNCTION_ATTRIBUTES
849+
simde__m128h
850+
simde__m128h_from_private(simde__m128h_private v) {
851+
simde__m128h r;
852+
simde_memcpy(&r, &v, sizeof(r));
853+
return r;
854+
}
855+
856+
SIMDE_FUNCTION_ATTRIBUTES
857+
simde__m128h_private
858+
simde__m128h_to_private(simde__m128h v) {
859+
simde__m128h_private r;
860+
simde_memcpy(&r, &v, sizeof(r));
861+
return r;
862+
}
863+
710864
SIMDE_FUNCTION_ATTRIBUTES
711865
simde__m128bh
712866
simde__m128bh_from_private(simde__m128bh_private v) {
@@ -723,6 +877,22 @@ simde__m128bh_to_private(simde__m128bh v) {
723877
return r;
724878
}
725879

880+
SIMDE_FUNCTION_ATTRIBUTES
881+
simde__m256h
882+
simde__m256h_from_private(simde__m256h_private v) {
883+
simde__m256h r;
884+
simde_memcpy(&r, &v, sizeof(r));
885+
return r;
886+
}
887+
888+
SIMDE_FUNCTION_ATTRIBUTES
889+
simde__m256h_private
890+
simde__m256h_to_private(simde__m256h v) {
891+
simde__m256h_private r;
892+
simde_memcpy(&r, &v, sizeof(r));
893+
return r;
894+
}
895+
726896
SIMDE_FUNCTION_ATTRIBUTES
727897
simde__m256bh
728898
simde__m256bh_from_private(simde__m256bh_private v) {

0 commit comments

Comments
 (0)