Skip to content

Commit 8742a54

Browse files
RKSimonkcloudy0717
authored andcommitted
[X86] Add tests showing failure to concat fp rounding intrinsics together. (llvm#170108)
1 parent 779aa75 commit 8742a54

File tree

6 files changed

+1019
-0
lines changed

6 files changed

+1019
-0
lines changed
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
6+
7+
define <4 x double> @concat_ceil_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
8+
; SSE-LABEL: concat_ceil_v4f64_v2f64:
9+
; SSE: # %bb.0:
10+
; SSE-NEXT: roundpd $10, %xmm0, %xmm0
11+
; SSE-NEXT: roundpd $10, %xmm1, %xmm1
12+
; SSE-NEXT: retq
13+
;
14+
; AVX-LABEL: concat_ceil_v4f64_v2f64:
15+
; AVX: # %bb.0:
16+
; AVX-NEXT: vroundpd $10, %xmm0, %xmm0
17+
; AVX-NEXT: vroundpd $10, %xmm1, %xmm1
18+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19+
; AVX-NEXT: retq
20+
%v0 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a0)
21+
%v1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a1)
22+
%res = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23+
ret <4 x double> %res
24+
}
25+
26+
define <8 x float> @concat_ceil_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
27+
; SSE-LABEL: concat_ceil_v8f32_v4f32:
28+
; SSE: # %bb.0:
29+
; SSE-NEXT: roundps $10, %xmm0, %xmm0
30+
; SSE-NEXT: roundps $10, %xmm1, %xmm1
31+
; SSE-NEXT: retq
32+
;
33+
; AVX-LABEL: concat_ceil_v8f32_v4f32:
34+
; AVX: # %bb.0:
35+
; AVX-NEXT: vroundps $10, %xmm0, %xmm0
36+
; AVX-NEXT: vroundps $10, %xmm1, %xmm1
37+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
38+
; AVX-NEXT: retq
39+
%v0 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a0)
40+
%v1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a1)
41+
%res = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
42+
ret <8 x float> %res
43+
}
44+
45+
define <8 x double> @concat_ceil_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
46+
; SSE-LABEL: concat_ceil_v8f64_v2f64:
47+
; SSE: # %bb.0:
48+
; SSE-NEXT: roundpd $10, %xmm0, %xmm0
49+
; SSE-NEXT: roundpd $10, %xmm1, %xmm1
50+
; SSE-NEXT: roundpd $10, %xmm2, %xmm2
51+
; SSE-NEXT: roundpd $10, %xmm3, %xmm3
52+
; SSE-NEXT: retq
53+
;
54+
; AVX1OR2-LABEL: concat_ceil_v8f64_v2f64:
55+
; AVX1OR2: # %bb.0:
56+
; AVX1OR2-NEXT: vroundpd $10, %xmm0, %xmm0
57+
; AVX1OR2-NEXT: vroundpd $10, %xmm1, %xmm1
58+
; AVX1OR2-NEXT: vroundpd $10, %xmm2, %xmm2
59+
; AVX1OR2-NEXT: vroundpd $10, %xmm3, %xmm3
60+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
61+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
62+
; AVX1OR2-NEXT: retq
63+
;
64+
; AVX512-LABEL: concat_ceil_v8f64_v2f64:
65+
; AVX512: # %bb.0:
66+
; AVX512-NEXT: vroundpd $10, %xmm0, %xmm0
67+
; AVX512-NEXT: vroundpd $10, %xmm1, %xmm1
68+
; AVX512-NEXT: vroundpd $10, %xmm2, %xmm2
69+
; AVX512-NEXT: vroundpd $10, %xmm3, %xmm3
70+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
71+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
72+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
73+
; AVX512-NEXT: retq
74+
%v0 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a0)
75+
%v1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a1)
76+
%v2 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a2)
77+
%v3 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a3)
78+
%r01 = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
79+
%r23 = shufflevector <2 x double> %v2, <2 x double> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
80+
%res = shufflevector <4 x double> %r01, <4 x double> %r23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
81+
ret <8 x double> %res
82+
}
83+
84+
define <16 x float> @concat_ceil_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
85+
; SSE-LABEL: concat_ceil_v16f32_v4f32:
86+
; SSE: # %bb.0:
87+
; SSE-NEXT: roundps $10, %xmm0, %xmm0
88+
; SSE-NEXT: roundps $10, %xmm1, %xmm1
89+
; SSE-NEXT: roundps $10, %xmm2, %xmm2
90+
; SSE-NEXT: roundps $10, %xmm3, %xmm3
91+
; SSE-NEXT: retq
92+
;
93+
; AVX1OR2-LABEL: concat_ceil_v16f32_v4f32:
94+
; AVX1OR2: # %bb.0:
95+
; AVX1OR2-NEXT: vroundps $10, %xmm0, %xmm0
96+
; AVX1OR2-NEXT: vroundps $10, %xmm1, %xmm1
97+
; AVX1OR2-NEXT: vroundps $10, %xmm2, %xmm2
98+
; AVX1OR2-NEXT: vroundps $10, %xmm3, %xmm3
99+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
100+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
101+
; AVX1OR2-NEXT: retq
102+
;
103+
; AVX512-LABEL: concat_ceil_v16f32_v4f32:
104+
; AVX512: # %bb.0:
105+
; AVX512-NEXT: vroundps $10, %xmm0, %xmm0
106+
; AVX512-NEXT: vroundps $10, %xmm1, %xmm1
107+
; AVX512-NEXT: vroundps $10, %xmm2, %xmm2
108+
; AVX512-NEXT: vroundps $10, %xmm3, %xmm3
109+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
110+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
111+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
112+
; AVX512-NEXT: retq
113+
%v0 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a0)
114+
%v1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a1)
115+
%v2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a2)
116+
%v3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a3)
117+
%r01 = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
118+
%r23 = shufflevector <4 x float> %v2, <4 x float> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
119+
%res = shufflevector <8 x float> %r01, <8 x float> %r23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
120+
ret <16 x float> %res
121+
}
122+
123+
define <8 x double> @concat_ceil_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1) {
124+
; SSE-LABEL: concat_ceil_v8f64_v4f64:
125+
; SSE: # %bb.0:
126+
; SSE-NEXT: roundpd $10, %xmm0, %xmm0
127+
; SSE-NEXT: roundpd $10, %xmm1, %xmm1
128+
; SSE-NEXT: roundpd $10, %xmm2, %xmm2
129+
; SSE-NEXT: roundpd $10, %xmm3, %xmm3
130+
; SSE-NEXT: retq
131+
;
132+
; AVX1OR2-LABEL: concat_ceil_v8f64_v4f64:
133+
; AVX1OR2: # %bb.0:
134+
; AVX1OR2-NEXT: vroundpd $10, %ymm0, %ymm0
135+
; AVX1OR2-NEXT: vroundpd $10, %ymm1, %ymm1
136+
; AVX1OR2-NEXT: retq
137+
;
138+
; AVX512-LABEL: concat_ceil_v8f64_v4f64:
139+
; AVX512: # %bb.0:
140+
; AVX512-NEXT: vroundpd $10, %ymm0, %ymm0
141+
; AVX512-NEXT: vroundpd $10, %ymm1, %ymm1
142+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
143+
; AVX512-NEXT: retq
144+
%v0 = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a0)
145+
%v1 = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a1)
146+
%res = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
147+
ret <8 x double> %res
148+
}
149+
150+
define <16 x float> @concat_ceil_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1) {
151+
; SSE-LABEL: concat_ceil_v16f32_v8f32:
152+
; SSE: # %bb.0:
153+
; SSE-NEXT: roundps $10, %xmm0, %xmm0
154+
; SSE-NEXT: roundps $10, %xmm1, %xmm1
155+
; SSE-NEXT: roundps $10, %xmm2, %xmm2
156+
; SSE-NEXT: roundps $10, %xmm3, %xmm3
157+
; SSE-NEXT: retq
158+
;
159+
; AVX1OR2-LABEL: concat_ceil_v16f32_v8f32:
160+
; AVX1OR2: # %bb.0:
161+
; AVX1OR2-NEXT: vroundps $10, %ymm0, %ymm0
162+
; AVX1OR2-NEXT: vroundps $10, %ymm1, %ymm1
163+
; AVX1OR2-NEXT: retq
164+
;
165+
; AVX512-LABEL: concat_ceil_v16f32_v8f32:
166+
; AVX512: # %bb.0:
167+
; AVX512-NEXT: vroundps $10, %ymm0, %ymm0
168+
; AVX512-NEXT: vroundps $10, %ymm1, %ymm1
169+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
170+
; AVX512-NEXT: retq
171+
%v0 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a0)
172+
%v1 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a1)
173+
%res = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
174+
ret <16 x float> %res
175+
}
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
6+
7+
define <4 x double> @concat_nearbyint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
8+
; SSE-LABEL: concat_nearbyint_v4f64_v2f64:
9+
; SSE: # %bb.0:
10+
; SSE-NEXT: roundpd $12, %xmm0, %xmm0
11+
; SSE-NEXT: roundpd $12, %xmm1, %xmm1
12+
; SSE-NEXT: retq
13+
;
14+
; AVX-LABEL: concat_nearbyint_v4f64_v2f64:
15+
; AVX: # %bb.0:
16+
; AVX-NEXT: vroundpd $12, %xmm0, %xmm0
17+
; AVX-NEXT: vroundpd $12, %xmm1, %xmm1
18+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19+
; AVX-NEXT: retq
20+
%v0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a0)
21+
%v1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a1)
22+
%res = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23+
ret <4 x double> %res
24+
}
25+
26+
define <8 x float> @concat_nearbyint_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
27+
; SSE-LABEL: concat_nearbyint_v8f32_v4f32:
28+
; SSE: # %bb.0:
29+
; SSE-NEXT: roundps $12, %xmm0, %xmm0
30+
; SSE-NEXT: roundps $12, %xmm1, %xmm1
31+
; SSE-NEXT: retq
32+
;
33+
; AVX-LABEL: concat_nearbyint_v8f32_v4f32:
34+
; AVX: # %bb.0:
35+
; AVX-NEXT: vroundps $12, %xmm0, %xmm0
36+
; AVX-NEXT: vroundps $12, %xmm1, %xmm1
37+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
38+
; AVX-NEXT: retq
39+
%v0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a0)
40+
%v1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a1)
41+
%res = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
42+
ret <8 x float> %res
43+
}
44+
45+
define <8 x double> @concat_nearbyint_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
46+
; SSE-LABEL: concat_nearbyint_v8f64_v2f64:
47+
; SSE: # %bb.0:
48+
; SSE-NEXT: roundpd $12, %xmm0, %xmm0
49+
; SSE-NEXT: roundpd $12, %xmm1, %xmm1
50+
; SSE-NEXT: roundpd $12, %xmm2, %xmm2
51+
; SSE-NEXT: roundpd $12, %xmm3, %xmm3
52+
; SSE-NEXT: retq
53+
;
54+
; AVX1OR2-LABEL: concat_nearbyint_v8f64_v2f64:
55+
; AVX1OR2: # %bb.0:
56+
; AVX1OR2-NEXT: vroundpd $12, %xmm0, %xmm0
57+
; AVX1OR2-NEXT: vroundpd $12, %xmm1, %xmm1
58+
; AVX1OR2-NEXT: vroundpd $12, %xmm2, %xmm2
59+
; AVX1OR2-NEXT: vroundpd $12, %xmm3, %xmm3
60+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
61+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
62+
; AVX1OR2-NEXT: retq
63+
;
64+
; AVX512-LABEL: concat_nearbyint_v8f64_v2f64:
65+
; AVX512: # %bb.0:
66+
; AVX512-NEXT: vroundpd $12, %xmm0, %xmm0
67+
; AVX512-NEXT: vroundpd $12, %xmm1, %xmm1
68+
; AVX512-NEXT: vroundpd $12, %xmm2, %xmm2
69+
; AVX512-NEXT: vroundpd $12, %xmm3, %xmm3
70+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
71+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
72+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
73+
; AVX512-NEXT: retq
74+
%v0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a0)
75+
%v1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a1)
76+
%v2 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a2)
77+
%v3 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a3)
78+
%r01 = shufflevector <2 x double> %v0, <2 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
79+
%r23 = shufflevector <2 x double> %v2, <2 x double> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
80+
%res = shufflevector <4 x double> %r01, <4 x double> %r23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
81+
ret <8 x double> %res
82+
}
83+
84+
define <16 x float> @concat_nearbyint_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
85+
; SSE-LABEL: concat_nearbyint_v16f32_v4f32:
86+
; SSE: # %bb.0:
87+
; SSE-NEXT: roundps $12, %xmm0, %xmm0
88+
; SSE-NEXT: roundps $12, %xmm1, %xmm1
89+
; SSE-NEXT: roundps $12, %xmm2, %xmm2
90+
; SSE-NEXT: roundps $12, %xmm3, %xmm3
91+
; SSE-NEXT: retq
92+
;
93+
; AVX1OR2-LABEL: concat_nearbyint_v16f32_v4f32:
94+
; AVX1OR2: # %bb.0:
95+
; AVX1OR2-NEXT: vroundps $12, %xmm0, %xmm0
96+
; AVX1OR2-NEXT: vroundps $12, %xmm1, %xmm1
97+
; AVX1OR2-NEXT: vroundps $12, %xmm2, %xmm2
98+
; AVX1OR2-NEXT: vroundps $12, %xmm3, %xmm3
99+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
100+
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
101+
; AVX1OR2-NEXT: retq
102+
;
103+
; AVX512-LABEL: concat_nearbyint_v16f32_v4f32:
104+
; AVX512: # %bb.0:
105+
; AVX512-NEXT: vroundps $12, %xmm0, %xmm0
106+
; AVX512-NEXT: vroundps $12, %xmm1, %xmm1
107+
; AVX512-NEXT: vroundps $12, %xmm2, %xmm2
108+
; AVX512-NEXT: vroundps $12, %xmm3, %xmm3
109+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
110+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
111+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
112+
; AVX512-NEXT: retq
113+
%v0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a0)
114+
%v1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a1)
115+
%v2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a2)
116+
%v3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a3)
117+
%r01 = shufflevector <4 x float> %v0, <4 x float> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
118+
%r23 = shufflevector <4 x float> %v2, <4 x float> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
119+
%res = shufflevector <8 x float> %r01, <8 x float> %r23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
120+
ret <16 x float> %res
121+
}
122+
123+
define <8 x double> @concat_nearbyint_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1) {
124+
; SSE-LABEL: concat_nearbyint_v8f64_v4f64:
125+
; SSE: # %bb.0:
126+
; SSE-NEXT: roundpd $12, %xmm0, %xmm0
127+
; SSE-NEXT: roundpd $12, %xmm1, %xmm1
128+
; SSE-NEXT: roundpd $12, %xmm2, %xmm2
129+
; SSE-NEXT: roundpd $12, %xmm3, %xmm3
130+
; SSE-NEXT: retq
131+
;
132+
; AVX1OR2-LABEL: concat_nearbyint_v8f64_v4f64:
133+
; AVX1OR2: # %bb.0:
134+
; AVX1OR2-NEXT: vroundpd $12, %ymm0, %ymm0
135+
; AVX1OR2-NEXT: vroundpd $12, %ymm1, %ymm1
136+
; AVX1OR2-NEXT: retq
137+
;
138+
; AVX512-LABEL: concat_nearbyint_v8f64_v4f64:
139+
; AVX512: # %bb.0:
140+
; AVX512-NEXT: vroundpd $12, %ymm0, %ymm0
141+
; AVX512-NEXT: vroundpd $12, %ymm1, %ymm1
142+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
143+
; AVX512-NEXT: retq
144+
%v0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a0)
145+
%v1 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a1)
146+
%res = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
147+
ret <8 x double> %res
148+
}
149+
150+
define <16 x float> @concat_nearbyint_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1) {
151+
; SSE-LABEL: concat_nearbyint_v16f32_v8f32:
152+
; SSE: # %bb.0:
153+
; SSE-NEXT: roundps $12, %xmm0, %xmm0
154+
; SSE-NEXT: roundps $12, %xmm1, %xmm1
155+
; SSE-NEXT: roundps $12, %xmm2, %xmm2
156+
; SSE-NEXT: roundps $12, %xmm3, %xmm3
157+
; SSE-NEXT: retq
158+
;
159+
; AVX1OR2-LABEL: concat_nearbyint_v16f32_v8f32:
160+
; AVX1OR2: # %bb.0:
161+
; AVX1OR2-NEXT: vroundps $12, %ymm0, %ymm0
162+
; AVX1OR2-NEXT: vroundps $12, %ymm1, %ymm1
163+
; AVX1OR2-NEXT: retq
164+
;
165+
; AVX512-LABEL: concat_nearbyint_v16f32_v8f32:
166+
; AVX512: # %bb.0:
167+
; AVX512-NEXT: vroundps $12, %ymm0, %ymm0
168+
; AVX512-NEXT: vroundps $12, %ymm1, %ymm1
169+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
170+
; AVX512-NEXT: retq
171+
%v0 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a0)
172+
%v1 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a1)
173+
%res = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
174+
ret <16 x float> %res
175+
}

0 commit comments

Comments
 (0)