Skip to content

Commit ef702b0

Browse files
authored
x86/avx512/add: implemented _mm{,256}_mask{,z}_add_{epi8,ps,pd,sd} (#1364)
1 parent 5926ee9 commit ef702b0

File tree

2 files changed

+1311
-0
lines changed

2 files changed

+1311
-0
lines changed

simde/x86/avx512/add.h

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,62 @@ simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) {
148148
#define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b)
149149
#endif
150150

151+
SIMDE_FUNCTION_ATTRIBUTES
152+
simde__m128
153+
simde_mm_mask_add_ps(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) {
154+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
155+
return _mm_mask_add_ps(src, k, a, b);
156+
#else
157+
return simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b));
158+
#endif
159+
}
160+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
161+
#undef _mm_mask_add_ps
162+
#define _mm_mask_add_ps(src, k, a, b) simde_mm_mask_add_ps(src, k, a, b)
163+
#endif
164+
165+
SIMDE_FUNCTION_ATTRIBUTES
166+
simde__m128
167+
simde_mm_maskz_add_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) {
168+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
169+
return _mm_maskz_add_ps(k, a, b);
170+
#else
171+
return simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b));
172+
#endif
173+
}
174+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
175+
#undef _mm_maskz_add_ps
176+
#define _mm_maskz_add_ps(k, a, b) simde_mm_maskz_add_ps(k, a, b)
177+
#endif
178+
179+
SIMDE_FUNCTION_ATTRIBUTES
180+
simde__m128d
181+
simde_mm_mask_add_pd(simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) {
182+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
183+
return _mm_mask_add_pd(src, k, a, b);
184+
#else
185+
return simde_mm_mask_mov_pd(src, k, simde_mm_add_pd(a, b));
186+
#endif
187+
}
188+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
189+
#undef _mm_mask_add_pd
190+
#define _mm_mask_add_pd(src, k, a, b) simde_mm_mask_add_pd(src, k, a, b)
191+
#endif
192+
193+
SIMDE_FUNCTION_ATTRIBUTES
194+
simde__m128d
195+
simde_mm_maskz_add_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) {
196+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
197+
return _mm_maskz_add_pd(k, a, b);
198+
#else
199+
return simde_mm_maskz_mov_pd(k, simde_mm_add_pd(a, b));
200+
#endif
201+
}
202+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
203+
#undef _mm_maskz_add_pd
204+
#define _mm_maskz_add_pd(k, a, b) simde_mm_maskz_add_pd(k, a, b)
205+
#endif
206+
151207
SIMDE_FUNCTION_ATTRIBUTES
152208
simde__m128
153209
simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) {
@@ -195,6 +251,81 @@ simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) {
195251
#define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b)
196252
#endif
197253

254+
SIMDE_FUNCTION_ATTRIBUTES
255+
simde__m128d
256+
simde_mm_mask_add_sd(simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) {
257+
#if defined(SIMDE_X86_AVX512F_NATIVE)
258+
return _mm_mask_add_sd(src, k, a, b);
259+
#elif 1
260+
simde__m128d_private
261+
src_ = simde__m128d_to_private(src),
262+
a_ = simde__m128d_to_private(a),
263+
b_ = simde__m128d_to_private(b),
264+
r_ = simde__m128d_to_private(a);
265+
266+
r_.f64[0] = (k & 1) ? (a_.f64[0] + b_.f64[0]) : src_.f64[0];
267+
268+
return simde__m128d_from_private(r_);
269+
#else
270+
return simde_mm_move_sd(a, simde_mm_mask_mov_pd(src, k, simde_mm_add_pd(a, b)));
271+
#endif
272+
}
273+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
274+
#undef _mm_mask_add_sd
275+
#define _mm_mask_add_sd(src, k, a, b) simde_mm_mask_add_sd(src, k, a, b)
276+
#endif
277+
278+
SIMDE_FUNCTION_ATTRIBUTES
279+
simde__m128d
280+
simde_mm_maskz_add_sd(simde__mmask8 k, simde__m128d a, simde__m128d b) {
281+
#if defined(SIMDE_X86_AVX512F_NATIVE)
282+
return _mm_maskz_add_sd(k, a, b);
283+
#elif 1
284+
simde__m128d_private
285+
a_ = simde__m128d_to_private(a),
286+
b_ = simde__m128d_to_private(b),
287+
r_ = simde__m128d_to_private(a);
288+
289+
r_.f64[0] = (k & 1) ? (a_.f64[0] + b_.f64[0]) : 0.0;
290+
291+
return simde__m128d_from_private(r_);
292+
#else
293+
return simde_mm_move_sd(a, simde_mm_maskz_mov_pd(k, simde_mm_add_pd(a, b)));
294+
#endif
295+
}
296+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
297+
#undef _mm_maskz_add_sd
298+
#define _mm_maskz_add_sd(k, a, b) simde_mm_maskz_add_sd(k, a, b)
299+
#endif
300+
301+
SIMDE_FUNCTION_ATTRIBUTES
302+
simde__m256i
303+
simde_mm256_mask_add_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) {
304+
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
305+
return _mm256_mask_add_epi8(src, k, a, b);
306+
#else
307+
return simde_mm256_mask_mov_epi8(src, k, simde_mm256_add_epi8(a, b));
308+
#endif
309+
}
310+
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
311+
#undef _mm256_mask_add_epi8
312+
#define _mm256_mask_add_epi8(src, k, a, b) simde_mm256_mask_add_epi8(src, k, a, b)
313+
#endif
314+
315+
SIMDE_FUNCTION_ATTRIBUTES
316+
simde__m256i
317+
simde_mm256_maskz_add_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) {
318+
#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE)
319+
return _mm256_maskz_add_epi8(k, a, b);
320+
#else
321+
return simde_mm256_maskz_mov_epi8(k, simde_mm256_add_epi8(a, b));
322+
#endif
323+
}
324+
#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
325+
#undef _mm256_maskz_add_epi8
326+
#define _mm256_maskz_add_epi8(k, a, b) simde_mm256_maskz_add_epi8(k, a, b)
327+
#endif
328+
198329
SIMDE_FUNCTION_ATTRIBUTES
199330
simde__m256i
200331
simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) {
@@ -279,6 +410,62 @@ simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) {
279410
#define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b)
280411
#endif
281412

413+
SIMDE_FUNCTION_ATTRIBUTES
414+
simde__m256
415+
simde_mm256_mask_add_ps(simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) {
416+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
417+
return _mm256_mask_add_ps(src, k, a, b);
418+
#else
419+
return simde_mm256_mask_mov_ps(src, k, simde_mm256_add_ps(a, b));
420+
#endif
421+
}
422+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
423+
#undef _mm256_mask_add_ps
424+
#define _mm256_mask_add_ps(src, k, a, b) simde_mm256_mask_add_ps(src, k, a, b)
425+
#endif
426+
427+
SIMDE_FUNCTION_ATTRIBUTES
428+
simde__m256
429+
simde_mm256_maskz_add_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) {
430+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
431+
return _mm256_maskz_add_ps(k, a, b);
432+
#else
433+
return simde_mm256_maskz_mov_ps(k, simde_mm256_add_ps(a, b));
434+
#endif
435+
}
436+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
437+
#undef _mm256_maskz_add_ps
438+
#define _mm256_maskz_add_ps(k, a, b) simde_mm256_maskz_add_ps(k, a, b)
439+
#endif
440+
441+
SIMDE_FUNCTION_ATTRIBUTES
442+
simde__m256d
443+
simde_mm256_mask_add_pd(simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) {
444+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
445+
return _mm256_mask_add_pd(src, k, a, b);
446+
#else
447+
return simde_mm256_mask_mov_pd(src, k, simde_mm256_add_pd(a, b));
448+
#endif
449+
}
450+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
451+
#undef _mm256_mask_add_pd
452+
#define _mm256_mask_add_pd(src, k, a, b) simde_mm256_mask_add_pd(src, k, a, b)
453+
#endif
454+
455+
SIMDE_FUNCTION_ATTRIBUTES
456+
simde__m256d
457+
simde_mm256_maskz_add_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) {
458+
#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
459+
return _mm256_maskz_add_pd(k, a, b);
460+
#else
461+
return simde_mm256_maskz_mov_pd(k, simde_mm256_add_pd(a, b));
462+
#endif
463+
}
464+
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
465+
#undef _mm256_maskz_add_pd
466+
#define _mm256_maskz_add_pd(k, a, b) simde_mm256_maskz_add_pd(k, a, b)
467+
#endif
468+
282469
SIMDE_FUNCTION_ATTRIBUTES
283470
simde__m512i
284471
simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) {

0 commit comments

Comments
 (0)