Skip to content

Commit 1bfc610

Browse files
committed
[SLP]Fix spill cost analysis for split vectorized nodes
If the entry is SplitVectorize, it can be skipped in favor of its operands, operands allow correctly detect spill costs. Fixes #133288
1 parent 77ba691 commit 1bfc610

File tree

2 files changed

+84
-1
lines changed

2 files changed

+84
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13347,7 +13347,8 @@ InstructionCost BoUpSLP::getSpillCost() {
1334713347
for (const TreeEntry *Op : Operands) {
1334813348
if (!Op->isGather())
1334913349
LiveEntries.push_back(Op);
13350-
if ((Entry->getOpcode() != Instruction::PHI && Op->isGather()) ||
13350+
if (Entry->State == TreeEntry::SplitVectorize ||
13351+
(Entry->getOpcode() != Instruction::PHI && Op->isGather()) ||
1335113352
(Op->isGather() && allConstant(Op->Scalars)))
1335213353
continue;
1335313354
Budget = 0;
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 -mattr=+sse4.1 < %s | FileCheck %s
3+
4+
define void @test(i32 %arg) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) null, align 4
9+
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(3) null, align 4
10+
; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr addrspace(3) null, align 4
11+
; CHECK-NEXT: [[LOAD3:%.*]] = load i32, ptr addrspace(3) null, align 4
12+
; CHECK-NEXT: br label %[[BB4:.*]]
13+
; CHECK: [[BB4]]:
14+
; CHECK-NEXT: switch i32 0, label %[[BB8:.*]] [
15+
; CHECK-NEXT: i32 0, label %[[BB7:.*]]
16+
; CHECK-NEXT: i32 1, label %[[BB21:.*]]
17+
; CHECK-NEXT: ]
18+
; CHECK: [[BB5:.*:]]
19+
; CHECK-NEXT: br label %[[BB21]]
20+
; CHECK: [[BB6:.*]]:
21+
; CHECK-NEXT: br label %[[BB12:.*]]
22+
; CHECK: [[BB7]]:
23+
; CHECK-NEXT: ret void
24+
; CHECK: [[BB8]]:
25+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ zeroinitializer, %[[BB4]] ]
26+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[LOAD]], i32 0
27+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LOAD1]], i32 1
28+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LOAD3]], i32 2
29+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[LOAD2]], i32 3
30+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
31+
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP5]], <4 x i32> [[TMP4]], i64 4)
32+
; CHECK-NEXT: br label %[[BB12]]
33+
; CHECK: [[BB12]]:
34+
; CHECK-NEXT: [[TMP7:%.*]] = phi <8 x i32> [ [[TMP6]], %[[BB8]] ], [ poison, %[[BB6]] ]
35+
; CHECK-NEXT: ret void
36+
; CHECK: [[BB21]]:
37+
; CHECK-NEXT: ret void
38+
;
39+
bb:
40+
%load = load i32, ptr addrspace(3) null, align 4
41+
%load1 = load i32, ptr addrspace(3) null, align 4
42+
%load2 = load i32, ptr addrspace(3) null, align 4
43+
%load3 = load i32, ptr addrspace(3) null, align 4
44+
br label %bb4
45+
46+
bb4:
47+
switch i32 0, label %bb8 [
48+
i32 0, label %bb7
49+
i32 1, label %bb21
50+
]
51+
52+
bb5:
53+
%srem = srem i32 0, 0
54+
br label %bb21
55+
56+
bb6:
57+
br label %bb12
58+
59+
bb7:
60+
ret void
61+
62+
bb8:
63+
%phi = phi i32 [ 0, %bb4 ]
64+
%phi9 = phi i32 [ 0, %bb4 ]
65+
%phi10 = phi i32 [ 0, %bb4 ]
66+
%phi11 = phi i32 [ 0, %bb4 ]
67+
br label %bb12
68+
69+
bb12:
70+
%phi13 = phi i32 [ %load, %bb8 ], [ 0, %bb6 ]
71+
%phi14 = phi i32 [ %load1, %bb8 ], [ 0, %bb6 ]
72+
%phi15 = phi i32 [ %load2, %bb8 ], [ %arg, %bb6 ]
73+
%phi16 = phi i32 [ %load3, %bb8 ], [ 0, %bb6 ]
74+
%phi17 = phi i32 [ %phi, %bb8 ], [ %srem, %bb6 ]
75+
%phi18 = phi i32 [ %phi11, %bb8 ], [ 0, %bb6 ]
76+
%phi19 = phi i32 [ %phi9, %bb8 ], [ 0, %bb6 ]
77+
%phi20 = phi i32 [ %phi10, %bb8 ], [ 0, %bb6 ]
78+
ret void
79+
80+
bb21:
81+
ret void
82+
}

0 commit comments

Comments
 (0)