Skip to content

Commit ea1a38d

Browse files
RKSimonkcloudy0717
authored andcommitted
[X86] combineConcatVectorOps - add handling to concat ISD::FROUND/FFLOOR intrinsics together (llvm#170176)
These were missed in llvm#170160
1 parent bece5e0 commit ea1a38d

File tree

3 files changed

+148
-137
lines changed

3 files changed

+148
-137
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59463,7 +59463,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5946359463
case ISD::FTRUNC:
5946459464
case ISD::FRINT:
5946559465
case ISD::FNEARBYINT:
59466+
case ISD::FROUND:
5946659467
case ISD::FROUNDEVEN:
59468+
case ISD::FFLOOR:
5946759469
if (!IsSplat && (VT.is256BitVector() ||
5946859470
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
5946959471
return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));

llvm/test/CodeGen/X86/combine-ffloor.ll

Lines changed: 52 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define <4 x double> @concat_floor_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1
1313
;
1414
; AVX-LABEL: concat_floor_v4f64_v2f64:
1515
; AVX: # %bb.0:
16-
; AVX-NEXT: vroundpd $9, %xmm0, %xmm0
17-
; AVX-NEXT: vroundpd $9, %xmm1, %xmm1
16+
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1817
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
18+
; AVX-NEXT: vroundpd $9, %ymm0, %ymm0
1919
; AVX-NEXT: retq
2020
%v0 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a0)
2121
%v1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_floor_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
3232
;
3333
; AVX-LABEL: concat_floor_v8f32_v4f32:
3434
; AVX: # %bb.0:
35-
; AVX-NEXT: vroundps $9, %xmm0, %xmm0
36-
; AVX-NEXT: vroundps $9, %xmm1, %xmm1
35+
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3736
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
37+
; AVX-NEXT: vroundps $9, %ymm0, %ymm0
3838
; AVX-NEXT: retq
3939
%v0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a0)
4040
%v1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_floor_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1
5151
; SSE-NEXT: roundpd $9, %xmm3, %xmm3
5252
; SSE-NEXT: retq
5353
;
54-
; AVX1OR2-LABEL: concat_floor_v8f64_v2f64:
55-
; AVX1OR2: # %bb.0:
56-
; AVX1OR2-NEXT: vroundpd $9, %xmm0, %xmm0
57-
; AVX1OR2-NEXT: vroundpd $9, %xmm1, %xmm1
58-
; AVX1OR2-NEXT: vroundpd $9, %xmm2, %xmm2
59-
; AVX1OR2-NEXT: vroundpd $9, %xmm3, %xmm3
60-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
61-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
62-
; AVX1OR2-NEXT: retq
54+
; AVX1-LABEL: concat_floor_v8f64_v2f64:
55+
; AVX1: # %bb.0:
56+
; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
57+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
58+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
59+
; AVX1-NEXT: vroundpd $9, %ymm0, %ymm0
60+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
61+
; AVX1-NEXT: vroundpd $9, %ymm1, %ymm1
62+
; AVX1-NEXT: retq
63+
;
64+
; AVX2-LABEL: concat_floor_v8f64_v2f64:
65+
; AVX2: # %bb.0:
66+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
67+
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
68+
; AVX2-NEXT: vroundpd $9, %ymm0, %ymm0
69+
; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
70+
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
71+
; AVX2-NEXT: vroundpd $9, %ymm1, %ymm1
72+
; AVX2-NEXT: retq
6373
;
6474
; AVX512-LABEL: concat_floor_v8f64_v2f64:
6575
; AVX512: # %bb.0:
66-
; AVX512-NEXT: vroundpd $9, %xmm0, %xmm0
67-
; AVX512-NEXT: vroundpd $9, %xmm1, %xmm1
68-
; AVX512-NEXT: vroundpd $9, %xmm2, %xmm2
69-
; AVX512-NEXT: vroundpd $9, %xmm3, %xmm3
76+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
77+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
7078
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
7179
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7280
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
81+
; AVX512-NEXT: vrndscalepd $9, %zmm0, %zmm0
7382
; AVX512-NEXT: retq
7483
%v0 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a0)
7584
%v1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %a1)
@@ -90,25 +99,34 @@ define <16 x float> @concat_floor_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
9099
; SSE-NEXT: roundps $9, %xmm3, %xmm3
91100
; SSE-NEXT: retq
92101
;
93-
; AVX1OR2-LABEL: concat_floor_v16f32_v4f32:
94-
; AVX1OR2: # %bb.0:
95-
; AVX1OR2-NEXT: vroundps $9, %xmm0, %xmm0
96-
; AVX1OR2-NEXT: vroundps $9, %xmm1, %xmm1
97-
; AVX1OR2-NEXT: vroundps $9, %xmm2, %xmm2
98-
; AVX1OR2-NEXT: vroundps $9, %xmm3, %xmm3
99-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
100-
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
101-
; AVX1OR2-NEXT: retq
102+
; AVX1-LABEL: concat_floor_v16f32_v4f32:
103+
; AVX1: # %bb.0:
104+
; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
105+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
106+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
107+
; AVX1-NEXT: vroundps $9, %ymm0, %ymm0
108+
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
109+
; AVX1-NEXT: vroundps $9, %ymm1, %ymm1
110+
; AVX1-NEXT: retq
111+
;
112+
; AVX2-LABEL: concat_floor_v16f32_v4f32:
113+
; AVX2: # %bb.0:
114+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
115+
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
116+
; AVX2-NEXT: vroundps $9, %ymm0, %ymm0
117+
; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
118+
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
119+
; AVX2-NEXT: vroundps $9, %ymm1, %ymm1
120+
; AVX2-NEXT: retq
102121
;
103122
; AVX512-LABEL: concat_floor_v16f32_v4f32:
104123
; AVX512: # %bb.0:
105-
; AVX512-NEXT: vroundps $9, %xmm0, %xmm0
106-
; AVX512-NEXT: vroundps $9, %xmm1, %xmm1
107-
; AVX512-NEXT: vroundps $9, %xmm2, %xmm2
108-
; AVX512-NEXT: vroundps $9, %xmm3, %xmm3
124+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
125+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
109126
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
110127
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
111128
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
129+
; AVX512-NEXT: vrndscaleps $9, %zmm0, %zmm0
112130
; AVX512-NEXT: retq
113131
%v0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a0)
114132
%v1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a1)
@@ -137,9 +155,9 @@ define <8 x double> @concat_floor_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1
137155
;
138156
; AVX512-LABEL: concat_floor_v8f64_v4f64:
139157
; AVX512: # %bb.0:
140-
; AVX512-NEXT: vroundpd $9, %ymm0, %ymm0
141-
; AVX512-NEXT: vroundpd $9, %ymm1, %ymm1
158+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
142159
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
160+
; AVX512-NEXT: vrndscalepd $9, %zmm0, %zmm0
143161
; AVX512-NEXT: retq
144162
%v0 = call <4 x double> @llvm.floor.v4f64(<4 x double> %a0)
145163
%v1 = call <4 x double> @llvm.floor.v4f64(<4 x double> %a1)
@@ -164,15 +182,12 @@ define <16 x float> @concat_floor_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1)
164182
;
165183
; AVX512-LABEL: concat_floor_v16f32_v8f32:
166184
; AVX512: # %bb.0:
167-
; AVX512-NEXT: vroundps $9, %ymm0, %ymm0
168-
; AVX512-NEXT: vroundps $9, %ymm1, %ymm1
185+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
169186
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
187+
; AVX512-NEXT: vrndscaleps $9, %zmm0, %zmm0
170188
; AVX512-NEXT: retq
171189
%v0 = call <8 x float> @llvm.floor.v8f32(<8 x float> %a0)
172190
%v1 = call <8 x float> @llvm.floor.v8f32(<8 x float> %a1)
173191
%res = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
174192
ret <16 x float> %res
175193
}
176-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
177-
; AVX1: {{.*}}
178-
; AVX2: {{.*}}

0 commit comments

Comments
 (0)