Skip to content

Commit e71f98f

Browse files
committed
x86 avx512 storeu: implement remaining instrinsics
1 parent afc2140 commit e71f98f

File tree

2 files changed

+1111
-1
lines changed

2 files changed

+1111
-1
lines changed

simde/x86/avx512/storeu.h

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,114 @@ HEDLEY_DIAGNOSTIC_PUSH
3535
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
3636
SIMDE_BEGIN_DECLS_
3737

38+
#define simde_mm_storeu_epi8(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
39+
#define simde_mm_storeu_epi16(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
40+
#define simde_mm_storeu_epi32(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
41+
#define simde_mm_storeu_epi64(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
42+
43+
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
44+
#undef _mm_storeu_epi8
45+
#undef _mm_storeu_epi16
46+
#define _mm_storeu_epi8(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
47+
#define _mm_storeu_epi16(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
48+
#endif
49+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
50+
#undef _mm_storeu_epi32
51+
#undef _mm_storeu_epi64
52+
#define _mm_storeu_epi32(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
53+
#define _mm_storeu_epi64(mem_addr, a) simde_mm_storeu_si128(mem_addr, a)
54+
#endif
55+
56+
SIMDE_FUNCTION_ATTRIBUTES
57+
void
58+
simde_mm_mask_storeu_epi8 (void * mem_addr, simde__mmask16 k, simde__m128i a) {
59+
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
60+
_mm_mask_storeu_epi8(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
61+
#else
62+
const simde__m128i src = simde_mm_loadu_epi8(mem_addr);
63+
simde_mm_storeu_epi8(mem_addr, simde_mm_mask_mov_epi8(src, k, a));
64+
#endif
65+
}
66+
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
67+
#undef _mm_mask_storeu_epi8
68+
#define _mm_mask_storeu_epi8(mem_addr, k, a) simde_mm_mask_storeu_epi8(mem_addr, k, a)
69+
#endif
70+
71+
SIMDE_FUNCTION_ATTRIBUTES
72+
void
73+
simde_mm_mask_storeu_epi16 (void * mem_addr, simde__mmask8 k, simde__m128i a) {
74+
#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
75+
_mm_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
76+
#else
77+
const simde__m128i src = simde_mm_loadu_epi16(mem_addr);
78+
simde_mm_storeu_epi16(mem_addr, simde_mm_mask_mov_epi16(src, k, a));
79+
#endif
80+
}
81+
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
82+
#undef _mm_mask_storeu_epi16
83+
#define _mm_mask_storeu_epi16(mem_addr, k, a) simde_mm_mask_storeu_epi16(mem_addr, k, a)
84+
#endif
85+
86+
SIMDE_FUNCTION_ATTRIBUTES
87+
void
88+
simde_mm_mask_storeu_epi32 (void * mem_addr, simde__mmask8 k, simde__m128i a) {
89+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
90+
_mm_mask_storeu_epi32(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
91+
#else
92+
const simde__m128i src = simde_mm_loadu_epi32(mem_addr);
93+
simde_mm_storeu_epi32(mem_addr, simde_mm_mask_mov_epi32(src, k, a));
94+
#endif
95+
}
96+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
97+
#undef _mm_mask_storeu_epi32
98+
#define _mm_mask_storeu_epi32(mem_addr, k, a) simde_mm_mask_storeu_epi32(mem_addr, k, a)
99+
#endif
100+
101+
SIMDE_FUNCTION_ATTRIBUTES
102+
void
103+
simde_mm_mask_storeu_epi64 (void * mem_addr, simde__mmask8 k, simde__m128i a) {
104+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
105+
_mm_mask_storeu_epi64(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
106+
#else
107+
const simde__m128i src = simde_mm_loadu_epi64(mem_addr);
108+
simde_mm_storeu_epi64(mem_addr, simde_mm_mask_mov_epi64(src, k, a));
109+
#endif
110+
}
111+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
112+
#undef _mm_mask_storeu_epi64
113+
#define _mm_mask_storeu_epi64(mem_addr, k, a) simde_mm_mask_storeu_epi64(mem_addr, k, a)
114+
#endif
115+
116+
SIMDE_FUNCTION_ATTRIBUTES
117+
void
118+
simde_mm_mask_storeu_ps(void * mem_addr, simde__mmask8 k, simde__m128 a) {
119+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
120+
_mm_mask_storeu_ps(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
121+
#else
122+
const simde__m128 src = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(const simde_float32*, mem_addr));
123+
simde_mm_storeu_ps(HEDLEY_REINTERPRET_CAST(simde_float32*, mem_addr), simde_mm_mask_mov_ps(src, k, a));
124+
#endif
125+
}
126+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
127+
#undef _mm_mask_storeu_ps
128+
#define _mm_mask_storeu_ps(mem_addr, k, a) simde_mm_mask_storeu_ps(mem_addr, k, a)
129+
#endif
130+
131+
SIMDE_FUNCTION_ATTRIBUTES
132+
void
133+
simde_mm_mask_storeu_pd(void * mem_addr, simde__mmask8 k, simde__m128d a) {
134+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
135+
_mm_mask_storeu_pd(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
136+
#else
137+
const simde__m128d src = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(const simde_float64*, mem_addr));
138+
simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(simde_float64*, mem_addr), simde_mm_mask_mov_pd(src, k, a));
139+
#endif
140+
}
141+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
142+
#undef _mm_mask_storeu_pd
143+
#define _mm_mask_storeu_pd(mem_addr, k, a) simde_mm_mask_storeu_pd(mem_addr, k, a)
144+
#endif
145+
38146
#define simde_mm256_storeu_epi8(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a)
39147
#define simde_mm256_storeu_epi16(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a)
40148
#define simde_mm256_storeu_epi32(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a)
@@ -112,6 +220,36 @@ simde_mm256_mask_storeu_epi64 (void * mem_addr, simde__mmask8 k, simde__m256i a)
112220
#define _mm256_mask_storeu_epi64(mem_addr, k, a) simde_mm256_mask_storeu_epi64(mem_addr, k, a)
113221
#endif
114222

223+
SIMDE_FUNCTION_ATTRIBUTES
224+
void
225+
simde_mm256_mask_storeu_ps (void * mem_addr, simde__mmask8 k, simde__m256 a) {
226+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
227+
_mm256_mask_storeu_ps(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
228+
#else
229+
const simde__m256 src = simde_mm256_loadu_ps(HEDLEY_REINTERPRET_CAST(const simde_float32*, mem_addr));
230+
simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(simde_float32*, mem_addr), simde_mm256_mask_mov_ps(src, k, a));
231+
#endif
232+
}
233+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
234+
#undef _mm256_mask_storeu_ps
235+
#define _mm256_mask_storeu_ps(mem_addr, k, a) simde_mm256_mask_storeu_ps(mem_addr, k, a)
236+
#endif
237+
238+
SIMDE_FUNCTION_ATTRIBUTES
239+
void
240+
simde_mm256_mask_storeu_pd (void * mem_addr, simde__mmask8 k, simde__m256d a) {
241+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
242+
_mm256_mask_storeu_pd(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
243+
#else
244+
const simde__m256d src = simde_mm256_loadu_pd(HEDLEY_REINTERPRET_CAST(const simde_float64*, mem_addr));
245+
simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(simde_float64*, mem_addr), simde_mm256_mask_mov_pd(src, k, a));
246+
#endif
247+
}
248+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
249+
#undef _mm256_mask_storeu_pd
250+
#define _mm256_mask_storeu_pd(mem_addr, k, a) simde_mm256_mask_storeu_pd(mem_addr, k, a)
251+
#endif
252+
115253
SIMDE_FUNCTION_ATTRIBUTES
116254
void
117255
simde_mm512_storeu_ps (void * mem_addr, simde__m512 a) {
@@ -182,6 +320,21 @@ simde_mm512_storeu_si512 (void * mem_addr, simde__m512i a) {
182320
#define _mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a)
183321
#endif
184322

323+
SIMDE_FUNCTION_ATTRIBUTES
324+
void
325+
simde_mm512_mask_storeu_epi8 (void * mem_addr, simde__mmask64 k, simde__m512i a) {
326+
#if defined(SIMDE_X86_AVX512BW_NATIVE)
327+
_mm512_mask_storeu_epi8(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a);
328+
#else
329+
const simde__m512i src = simde_mm512_loadu_epi8(mem_addr);
330+
simde_mm512_storeu_epi8(mem_addr, simde_mm512_mask_mov_epi8(src, k, a));
331+
#endif
332+
}
333+
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
334+
#undef _mm512_mask_storeu_epi8
335+
#define _mm512_mask_storeu_epi8(mem_addr, k, a) simde_mm512_mask_storeu_epi8(mem_addr, k, a)
336+
#endif
337+
185338
SIMDE_FUNCTION_ATTRIBUTES
186339
void
187340
simde_mm512_mask_storeu_epi16 (void * mem_addr, simde__mmask32 k, simde__m512i a) {

0 commit comments

Comments
 (0)