Skip to content

Commit 76bfc78

Browse files
committed
[SDAG] Match BUILD_VECTOR in INSERT_SUBVECTOR to SPLAT_VECTOR fold
This allows for more constant folding when inserting fixed-length vector splats into scalable vectors.
1 parent 1e605fc commit 76bfc78

File tree

4 files changed

+12
-17
lines changed

4 files changed

+12
-17
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28014,9 +28014,13 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2801428014

2801528015
// Simplify scalar inserts into an undef vector:
2801628016
// insert_subvector undef, (splat X), N2 -> splat X
28017-
if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
28018-
if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
28017+
auto *BV0 = dyn_cast<BuildVectorSDNode>(N1);
28018+
if (N0.isUndef() && (N1.getOpcode() == ISD::SPLAT_VECTOR || BV0)) {
28019+
SDValue Splat = BV0 ? BV0->getSplatValue() : N1.getOperand(0);
28020+
if (Splat &&
28021+
(N1.hasOneUse() || (!BV0 && DAG.isConstantValueOfAnyType(Splat))))
2801928022
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
28023+
}
2802028024

2802128025
// insert_subvector (splat X), (splat X), N2 -> splat X
2802228026
if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&

llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,7 @@
44
define <vscale x 4 x i32> @insert_div() {
55
; CHECK-LABEL: insert_div:
66
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: mov w8, #43691 // =0xaaab
8-
; CHECK-NEXT: movi v0.4s, #9
9-
; CHECK-NEXT: ptrue p0.s
10-
; CHECK-NEXT: movk w8, #43690, lsl #16
11-
; CHECK-NEXT: mov z1.s, w8
12-
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
13-
; CHECK-NEXT: lsr z0.s, z0.s, #1
7+
; CHECK-NEXT: mov z0.s, #3 // =0x3
148
; CHECK-NEXT: ret
159
entry:
1610
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
@@ -21,8 +15,7 @@ entry:
2115
define <vscale x 4 x i32> @insert_mul() {
2216
; CHECK-LABEL: insert_mul:
2317
; CHECK: // %bb.0: // %entry
24-
; CHECK-NEXT: movi v0.4s, #1
25-
; CHECK-NEXT: mul z0.s, z0.s, #7
18+
; CHECK-NEXT: mov z0.s, #7 // =0x7
2619
; CHECK-NEXT: ret
2720
entry:
2821
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
@@ -33,8 +26,7 @@ entry:
3326
define <vscale x 4 x i32> @insert_add() {
3427
; CHECK-LABEL: insert_add:
3528
; CHECK: // %bb.0: // %entry
36-
; CHECK-NEXT: movi v0.4s, #5
37-
; CHECK-NEXT: add z0.s, z0.s, #11 // =0xb
29+
; CHECK-NEXT: mov z0.s, #16 // =0x10
3830
; CHECK-NEXT: ret
3931
entry:
4032
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
@@ -45,8 +37,7 @@ entry:
4537
define <vscale x 4 x i32> @insert_sub() {
4638
; CHECK-LABEL: insert_sub:
4739
; CHECK: // %bb.0: // %entry
48-
; CHECK-NEXT: movi v0.4s, #11
49-
; CHECK-NEXT: sub z0.s, z0.s, #11 // =0xb
40+
; CHECK-NEXT: movi v0.2d, #0000000000000000
5041
; CHECK-NEXT: ret
5142
entry:
5243
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4778,7 +4778,7 @@ entry:
47784778
define i64 @extract_scalable(<2 x i32> %0) "target-features"="+sve2" {
47794779
; CHECK-SD-LABEL: extract_scalable:
47804780
; CHECK-SD: // %bb.0:
4781-
; CHECK-SD-NEXT: movi v1.2s, #1
4781+
; CHECK-SD-NEXT: mov z1.s, #1 // =0x1
47824782
; CHECK-SD-NEXT: ptrue p0.s, vl2
47834783
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $z0
47844784
; CHECK-SD-NEXT: sdivr z0.s, p0/m, z0.s, z1.s

llvm/test/CodeGen/X86/pr35443.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
define void @pr35443() {
99
; CHECK-LABEL: pr35443:
1010
; CHECK: # %bb.0: # %entry
11-
; CHECK-NEXT: vpbroadcastb ac+4(%rip), %xmm0
11+
; CHECK-NEXT: vpbroadcastb ac+4(%rip), %ymm0
1212
; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1313
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1414
; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0

0 commit comments

Comments
 (0)