Skip to content

Commit c4051b2

Browse files
committed
[X86] Fold vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(vbroadcast(src)))
If the inner broadcast scalar type is smaller/same width as the outer broadcast scalar type then we can broadcast using the same inner type directly. Works for vbroadcast_load as well.
1 parent a3be778 commit c4051b2

File tree

3 files changed

+18
-4
lines changed

3 files changed

+18
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40291,6 +40291,21 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
4029140291
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
4029240292
}
4029340293

40294+
// vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(src))
40295+
// If we're re-broadcasting a smaller type then broadcast with that type and
40296+
// bitcast.
40297+
// TODO: Do this for any splat?
40298+
if (Src.getOpcode() == ISD::BITCAST &&
40299+
(BC.getOpcode() == X86ISD::VBROADCAST ||
40300+
BC.getOpcode() == X86ISD::VBROADCAST_LOAD) &&
40301+
(VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) == 0 &&
40302+
(VT.getSizeInBits() % BCVT.getSizeInBits()) == 0) {
40303+
MVT NewVT =
40304+
MVT::getVectorVT(BCVT.getSimpleVT().getScalarType(),
40305+
VT.getSizeInBits() / BCVT.getScalarSizeInBits());
40306+
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
40307+
}
40308+
4029440309
// Reduce broadcast source vector to lowest 128-bits.
4029540310
if (SrcVT.getSizeInBits() > 128)
4029640311
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,

llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ define void @endless_loop() {
2222
; AVX2-NEXT: vbroadcastss (%eax), %xmm0
2323
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
2424
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
25-
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
25+
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
2626
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
2727
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
2828
; AVX2-NEXT: vmovaps %ymm0, (%eax)

llvm/test/CodeGen/X86/combine-concatvectors.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,8 @@ define void @concat_of_broadcast_v2f64_v4f64() {
6262
; AVX2-NEXT: movl $1091567616, 30256(%rax) # imm = 0x41100000
6363
; AVX2-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001
6464
; AVX2-NEXT: movq %rcx, 46348(%rax)
65-
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
66-
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm1
67-
; AVX2-NEXT: vmovups %ymm1, 48296(%rax)
65+
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
66+
; AVX2-NEXT: vmovups %ymm0, 48296(%rax)
6867
; AVX2-NEXT: vmovlps %xmm0, 47372(%rax)
6968
; AVX2-NEXT: vzeroupper
7069
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)