Skip to content

Commit d73a011

Browse files
committed
test-suite: add avx512 tests with move-load-store intrinsics
Summary: Here is the next bunch of avx512 tests. In each test we do some load, store or move operations and also check result. Reviewers: MatzeB, craig.topper, zvi, RKSimon Reviewed By: RKSimon Subscribers: mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D51599 llvm-svn: 350816
1 parent 7dc3899 commit d73a011

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+5257
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
list(APPEND CPPFLAGS -I ${CMAKE_SOURCE_DIR}/${VECTOR_MAIN_DIR})
2+
list(APPEND LDFLAGS -lm)
3+
list(APPEND CFLAGS "-march=${X86CPU_ARCH}")
4+
list(APPEND CFLAGS -fms-extensions)
5+
llvm_singlesource(PREFIX "Vector-AVX512BW-")
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# SingleSource/UnitTests/Vector/AVX512BW/Makefile
2+
3+
DIRS =
4+
LEVEL = ../../../..
5+
CFLAGS += -fms-extensions -march=native -mavx512bw -I${SourceDir}/..
6+
LDFLAGS += -lm
7+
8+
include $(LEVEL)/SingleSource/Makefile.singlesrc
9+
10+
TARGET_FLAGS += -march=native -mavx512bw
11+
LCCFLAGS += -march=native -mavx512bw
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/*
2+
* Test load and store instructions.
3+
* Here we check for _mm512_[mask|maskz]_[loadu|storeu] intrinsics.
4+
*/
5+
#include "m512_test_util.h"
6+
7+
V512 src_vals[2];
8+
V512 all_ones;
9+
volatile int vol0 = 0;
10+
11+
void NOINLINE init() {
12+
volatile int i;
13+
int j;
14+
15+
for (i = 0; i < sizeof(src_vals) / sizeof(src_vals[0]); i++) {
16+
for (j = 0; j < 16; j++) {
17+
src_vals[i].s32[j] = 16 * i + j;
18+
}
19+
}
20+
21+
for (i = 0; i < 16; i++) {
22+
all_ones.s32[i] = -1;
23+
}
24+
}
25+
26+
void NOINLINE do_loadu() {
27+
V512 res;
28+
V512 expected;
29+
__mmask64 k64 = 0xfbde79feffeeffee;
30+
__mmask32 k32 = 0xbfde79fe;
31+
__mmask16 k16 = 0xbfde;
32+
__mmask8 k8 = 0xaf;
33+
volatile int i;
34+
signed char *p8 = &src_vals[0].s8[0];
35+
short *p16 = &src_vals[0].s16[0];
36+
int *p = &src_vals[0].s32[0];
37+
__int64 *p64 = &src_vals[0].s64[0];
38+
39+
res.zmm = _mm512_loadu_ps(&src_vals[0].s32[1]);
40+
for (i = 0; i < 16; i++) {
41+
expected.s32[i] = p[i + 1];
42+
}
43+
check_equal_nd(&res, &expected, 16, "_mm512_loadu_ps", __LINE__);
44+
45+
res.zmmd = _mm512_loadu_pd(&src_vals[0].s32[2]);
46+
for (i = 0; i < 16; i++) {
47+
expected.s32[i] = p[i + 2];
48+
}
49+
check_equal_nd(&res, &expected, 16, "_mm512_loadu_pd", __LINE__);
50+
51+
res.zmmi = _mm512_loadu_si512(&src_vals[0].s32[3]);
52+
for (i = 0; i < 16; i++) {
53+
expected.s32[i] = p[i + 3];
54+
}
55+
check_equal_nd(&res, &expected, 16, "_mm512_loadu_si512", __LINE__);
56+
57+
/* Now the write-masked versions. */
58+
59+
res = all_ones;
60+
expected = all_ones;
61+
res.zmm = _mm512_mask_loadu_ps(res.zmm, k16, &src_vals[0].s32[5]);
62+
for (i = 0; i < 16; i++) {
63+
if ((1 << i) & k16) {
64+
expected.s32[i] = p[i + 5];
65+
}
66+
}
67+
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_ps", __LINE__);
68+
69+
k64 += vol0;
70+
res = all_ones;
71+
expected = all_ones;
72+
res.zmmi = _mm512_mask_loadu_epi8(res.zmmi, k64, &src_vals[0].s8[7]);
73+
for (i = 0; i < 64; i++) {
74+
if (((__mmask64)1 << i) & k64) {
75+
expected.s8[i] = p8[i + 7];
76+
}
77+
}
78+
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi8", __LINE__);
79+
80+
k64 += vol0;
81+
res = all_ones;
82+
expected.zmmi = _mm512_setzero_epi32();
83+
res.zmmi = _mm512_maskz_loadu_epi8(k64, &src_vals[0].s8[9]);
84+
for (i = 0; i < 64; i++) {
85+
if (((__mmask64)1 << i) & k64) {
86+
expected.s8[i] = p8[i + 9];
87+
}
88+
}
89+
check_equal_nd(&res, &expected, 16, "_mm512_maskz_loadu_epi8", __LINE__);
90+
91+
k32 += vol0;
92+
res = all_ones;
93+
expected = all_ones;
94+
res.zmmi = _mm512_mask_loadu_epi16(res.zmmi, k32, &src_vals[0].s16[5]);
95+
for (i = 0; i < 32; i++) {
96+
if ((1 << i) & k32) {
97+
expected.s16[i] = p16[i + 5];
98+
}
99+
}
100+
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi16", __LINE__);
101+
102+
k32 += vol0;
103+
res = all_ones;
104+
expected.zmmi = _mm512_setzero_epi32();
105+
res.zmmi = _mm512_maskz_loadu_epi16(k32, &src_vals[0].s16[3]);
106+
for (i = 0; i < 32; i++) {
107+
if ((1 << i) & k32) {
108+
expected.s16[i] = p16[i + 3];
109+
}
110+
}
111+
check_equal_nd(&res, &expected, 16, "_mm512_maskz_loadu_epi16", __LINE__);
112+
113+
k16 = 0xabcd + vol0;
114+
res = all_ones;
115+
expected = all_ones;
116+
res.zmmi = _mm512_mask_loadu_epi32(res.zmmi, k16, &src_vals[0].s32[7]);
117+
for (i = 0; i < 16; i++) {
118+
if ((1 << i) & k16) {
119+
expected.s32[i] = p[i + 7];
120+
}
121+
}
122+
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi32", __LINE__);
123+
124+
res = all_ones;
125+
expected = all_ones;
126+
res.zmmd = _mm512_mask_loadu_pd(res.zmmd, k8, &src_vals[0].s64[2]);
127+
for (i = 0; i < 8; i++) {
128+
if ((1 << i) & k8) {
129+
expected.s64[i] = p64[i + 2];
130+
}
131+
}
132+
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_pd", __LINE__);
133+
134+
k8 = 0x79 + vol0;
135+
res = all_ones;
136+
expected = all_ones;
137+
res.zmmi = _mm512_mask_loadu_epi64(res.zmmi, k8, &src_vals[0].s64[3]);
138+
for (i = 0; i < 8; i++) {
139+
if ((1 << i) & k8) {
140+
expected.s64[i] = p64[i + 3];
141+
}
142+
}
143+
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi64", __LINE__);
144+
}
145+
146+
void NOINLINE do_storeu() {
147+
V512 src;
148+
V512 expected;
149+
volatile int i;
150+
static V512 dst_vals[2];
151+
__mmask64 k64 = 0xabcdffffffffeebd;
152+
__mmask32 k32 = 0xfefebdbd;
153+
__mmask16 k16 = 0x79ab;
154+
__mmask8 k8 = 0xea;
155+
156+
src.zmmi = src_vals[0].zmmi;
157+
158+
dst_vals[0].zmm = _mm512_setzero_ps();
159+
dst_vals[1].zmm = _mm512_setzero_ps();
160+
_mm512_storeu_si512(&dst_vals[0].s32[1], src.zmmi);
161+
check_equal_nd(&dst_vals[0].s32[1], &src_vals, 16, "_mm512_storeu_si512",
162+
__LINE__);
163+
164+
dst_vals[0].zmm = _mm512_setzero_ps();
165+
dst_vals[1].zmm = _mm512_setzero_ps();
166+
_mm512_storeu_ps(&dst_vals[0].s32[2], src.zmm);
167+
check_equal_nd(&dst_vals[0].s32[2], &src_vals, 16, "_mm512_storeu_pd",
168+
__LINE__);
169+
170+
dst_vals[0].zmm = _mm512_setzero_ps();
171+
dst_vals[1].zmm = _mm512_setzero_ps();
172+
_mm512_storeu_pd(&dst_vals[0].s32[4], src.zmmd);
173+
check_equal_nd(&dst_vals[0].s32[4], &src_vals, 16, "_mm512_storeu_pd",
174+
__LINE__);
175+
176+
/* Now the write-masked versions. */
177+
178+
dst_vals[0] = all_ones;
179+
dst_vals[1] = all_ones;
180+
_mm512_mask_storeu_epi8(&dst_vals[0].s8[3], k64, src.zmmi);
181+
expected = all_ones;
182+
for (i = 0; i < 64; i++) {
183+
if (((__mmask64)1 << i) & k64) {
184+
expected.s8[i] = src.s8[i];
185+
}
186+
}
187+
check_equal_nd(&dst_vals[0].s8[3], &expected, 16, "_mm512_mask_storeu_epi8",
188+
__LINE__);
189+
190+
dst_vals[0] = all_ones;
191+
dst_vals[1] = all_ones;
192+
_mm512_mask_storeu_epi16(&dst_vals[0].s16[3], k32, src.zmmi);
193+
expected = all_ones;
194+
for (i = 0; i < 32; i++) {
195+
if (((__mmask32)1 << i) & k32) {
196+
expected.s16[i] = src.s16[i];
197+
}
198+
}
199+
check_equal_nd(&dst_vals[0].s16[3], &expected, 16, "_mm512_mask_storeu_epi16",
200+
__LINE__);
201+
202+
dst_vals[0] = all_ones;
203+
dst_vals[1] = all_ones;
204+
_mm512_mask_storeu_epi32(&dst_vals[0].s32[1], k16, src.zmmi);
205+
expected = all_ones;
206+
for (i = 0; i < 16; i++) {
207+
if ((1 << i) & k16) {
208+
expected.s32[i] = src.s32[i];
209+
}
210+
}
211+
check_equal_nd(&dst_vals[0].s32[1], &expected, 16, "_mm512_mask_storeu_epi32",
212+
__LINE__);
213+
214+
k16 = 0xdcba + vol0;
215+
dst_vals[0] = all_ones;
216+
dst_vals[1] = all_ones;
217+
_mm512_mask_storeu_ps(&dst_vals[0].s32[3], k16, src.zmm);
218+
expected = all_ones;
219+
for (i = 0; i < 16; i++) {
220+
if ((1 << i) & k16) {
221+
expected.s32[i] = src.s32[i];
222+
}
223+
}
224+
check_equal_nd(&dst_vals[0].s32[3], &expected, 16, "_mm512_mask_storeu_ps",
225+
__LINE__);
226+
227+
k8 = 0xbc;
228+
dst_vals[0] = all_ones;
229+
dst_vals[1] = all_ones;
230+
_mm512_mask_storeu_pd(&dst_vals[0].s64[3], k8, src.zmmd);
231+
expected = all_ones;
232+
for (i = 0; i < 8; i++) {
233+
if ((1 << i) & k8) {
234+
expected.s64[i] = src.s64[i];
235+
}
236+
}
237+
check_equal_nd(&dst_vals[0].s64[3], &expected, 16, "_mm512_mask_storeu_pd",
238+
__LINE__);
239+
240+
k8 = 0xcb + vol0;
241+
dst_vals[0] = all_ones;
242+
dst_vals[1] = all_ones;
243+
_mm512_mask_storeu_epi64(&dst_vals[0].s64[1], k8, src.zmmi);
244+
expected = all_ones;
245+
for (i = 0; i < 8; i++) {
246+
if ((1 << i) & k8) {
247+
expected.s64[i] = src.s64[i];
248+
}
249+
}
250+
check_equal_nd(&dst_vals[0].s64[1], &expected, 16, "_mm512_mask_storeu_epi64",
251+
__LINE__);
252+
}
253+
254+
int main(int argc, char *argv[]) {
255+
init();
256+
257+
do_loadu();
258+
do_storeu();
259+
260+
if (n_errs != 0) {
261+
printf("FAILED\n");
262+
return 1;
263+
}
264+
265+
printf("PASSED\n");
266+
return 0;
267+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
PASSED
2+
exit 0

0 commit comments

Comments
 (0)