Skip to content

Commit 51c2efc

Browse files
committed
[X86][AVX] Fold vt1 concat_vectors(vt2 undef, vt2 broadcast(x)) --> vt1 broadcast(x)
If we're not inserting the broadcast into the lowest subvector then we can avoid the insertion by just performing a larger broadcast. Avoids a regression when we enable AVX1 broadcasts in shuffle combining llvm-svn: 352742
1 parent 9b12742 commit 51c2efc

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41593,6 +41593,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
4159341593

4159441594
// If this is subv_broadcast insert into both halves, use a larger
4159541595
// subv_broadcast.
41596+
// TODO - handle X86ISD::VBROADCAST as well?
4159641597
if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2)
4159741598
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
4159841599
SubVec.getOperand(0));
@@ -41614,11 +41615,14 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
4161441615
SubVec2, Vec.getOperand(2));
4161541616
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
4161641617
N->getOperand(2));
41617-
4161841618
}
4161941619
}
4162041620
}
4162141621

41622+
// If this is a broadcast insert into an upper undef, use a larger broadcast.
41623+
if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
41624+
return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));
41625+
4162241626
return SDValue();
4162341627
}
4162441628

llvm/test/CodeGen/X86/insert-into-constant-vector.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -289,31 +289,27 @@ define <8 x i32> @elt7_v8i32(i32 %x) {
289289
;
290290
; X32AVX2-LABEL: elt7_v8i32:
291291
; X32AVX2: # %bb.0:
292-
; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
293-
; X32AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
292+
; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
294293
; X32AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
295294
; X32AVX2-NEXT: retl
296295
;
297296
; X64AVX2-LABEL: elt7_v8i32:
298297
; X64AVX2: # %bb.0:
299298
; X64AVX2-NEXT: vmovd %edi, %xmm0
300-
; X64AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
301-
; X64AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
299+
; X64AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
302300
; X64AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
303301
; X64AVX2-NEXT: retq
304302
;
305303
; X32AVX512F-LABEL: elt7_v8i32:
306304
; X32AVX512F: # %bb.0:
307-
; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
308-
; X32AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
305+
; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
309306
; X32AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
310307
; X32AVX512F-NEXT: retl
311308
;
312309
; X64AVX512F-LABEL: elt7_v8i32:
313310
; X64AVX512F: # %bb.0:
314311
; X64AVX512F-NEXT: vmovd %edi, %xmm0
315-
; X64AVX512F-NEXT: vpbroadcastd %xmm0, %xmm0
316-
; X64AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
312+
; X64AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0
317313
; X64AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7]
318314
; X64AVX512F-NEXT: retq
319315
%ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7

0 commit comments

Comments
 (0)