Skip to content

Commit b76cada

Browse files
authored
[X86] combineConcatVectorOps - add handling to concat RCPPS/RSQRTPS intrinsics together (#170148)
Limited to 128->256 cases as we can't safely convert to the RCP14/RSQRT14 variants
1 parent 37858b0 commit b76cada

File tree

3 files changed

+30
-24
lines changed

3 files changed

+30
-24
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59464,6 +59464,12 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5946459464
return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
5946559465
}
5946659466
break;
59467+
case X86ISD::FRCP:
59468+
case X86ISD::FRSQRT:
59469+
if (!IsSplat && VT.is256BitVector()) {
59470+
return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
59471+
}
59472+
break;
5946759473
case X86ISD::HADD:
5946859474
case X86ISD::HSUB:
5946959475
case X86ISD::FHADD:

llvm/test/CodeGen/X86/combine-rcp.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ define <8 x float> @concat_rcp_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
1414
;
1515
; AVX-LABEL: concat_rcp_v8f32_v4f32:
1616
; AVX: # %bb.0:
17-
; AVX-NEXT: vrcpps %xmm0, %xmm0
18-
; AVX-NEXT: vrcpps %xmm1, %xmm1
17+
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1918
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19+
; AVX-NEXT: vrcpps %ymm0, %ymm0
2020
; AVX-NEXT: retq
2121
%v0 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
2222
%v1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a1)
@@ -36,23 +36,23 @@ define <16 x float> @concat_rcp_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <
3636
;
3737
; AVX1OR2-LABEL: concat_rcp_v16f32_v4f32:
3838
; AVX1OR2: # %bb.0:
39-
; AVX1OR2-NEXT: vrcpps %xmm0, %xmm0
40-
; AVX1OR2-NEXT: vrcpps %xmm1, %xmm1
41-
; AVX1OR2-NEXT: vrcpps %xmm2, %xmm2
42-
; AVX1OR2-NEXT: vrcpps %xmm3, %xmm3
39+
; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
40+
; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
4341
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
42+
; AVX1OR2-NEXT: vrcpps %ymm0, %ymm0
4443
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
44+
; AVX1OR2-NEXT: vrcpps %ymm1, %ymm1
4545
; AVX1OR2-NEXT: retq
4646
;
4747
; AVX512-LABEL: concat_rcp_v16f32_v4f32:
4848
; AVX512: # %bb.0:
49-
; AVX512-NEXT: vrcpps %xmm0, %xmm0
50-
; AVX512-NEXT: vrcpps %xmm1, %xmm1
51-
; AVX512-NEXT: vrcpps %xmm2, %xmm2
52-
; AVX512-NEXT: vrcpps %xmm3, %xmm3
53-
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
49+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
50+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
5451
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
55-
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
52+
; AVX512-NEXT: vrcpps %ymm0, %ymm0
53+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
54+
; AVX512-NEXT: vrcpps %ymm1, %ymm1
55+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
5656
; AVX512-NEXT: retq
5757
%v0 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
5858
%v1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a1)

llvm/test/CodeGen/X86/combine-rsqrt.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ define <8 x float> @concat_rsqrt_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
1414
;
1515
; AVX-LABEL: concat_rsqrt_v8f32_v4f32:
1616
; AVX: # %bb.0:
17-
; AVX-NEXT: vrsqrtps %xmm0, %xmm0
18-
; AVX-NEXT: vrsqrtps %xmm1, %xmm1
17+
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1918
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19+
; AVX-NEXT: vrsqrtps %ymm0, %ymm0
2020
; AVX-NEXT: retq
2121
%v0 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2222
%v1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a1)
@@ -36,23 +36,23 @@ define <16 x float> @concat_rsqrt_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
3636
;
3737
; AVX1OR2-LABEL: concat_rsqrt_v16f32_v4f32:
3838
; AVX1OR2: # %bb.0:
39-
; AVX1OR2-NEXT: vrsqrtps %xmm0, %xmm0
40-
; AVX1OR2-NEXT: vrsqrtps %xmm1, %xmm1
41-
; AVX1OR2-NEXT: vrsqrtps %xmm2, %xmm2
42-
; AVX1OR2-NEXT: vrsqrtps %xmm3, %xmm3
39+
; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
40+
; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
4341
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
42+
; AVX1OR2-NEXT: vrsqrtps %ymm0, %ymm0
4443
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
44+
; AVX1OR2-NEXT: vrsqrtps %ymm1, %ymm1
4545
; AVX1OR2-NEXT: retq
4646
;
4747
; AVX512-LABEL: concat_rsqrt_v16f32_v4f32:
4848
; AVX512: # %bb.0:
49-
; AVX512-NEXT: vrsqrtps %xmm0, %xmm0
50-
; AVX512-NEXT: vrsqrtps %xmm1, %xmm1
51-
; AVX512-NEXT: vrsqrtps %xmm2, %xmm2
52-
; AVX512-NEXT: vrsqrtps %xmm3, %xmm3
53-
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
49+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
50+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
5451
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
55-
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
52+
; AVX512-NEXT: vrsqrtps %ymm0, %ymm0
53+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
54+
; AVX512-NEXT: vrsqrtps %ymm1, %ymm1
55+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
5656
; AVX512-NEXT: retq
5757
%v0 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
5858
%v1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a1)

0 commit comments

Comments
 (0)