Skip to content

Commit f8d0c35

Browse files
committed
[SLP]Prefer instructions, ued outside the block, as the initial main copyable instructions
Instructions, used outside the block, must be considered the first choice for the main instructionsin the copyable nodes, to avoid use-before-def. Fixes #171055
1 parent 0e92beb commit f8d0c35

File tree

3 files changed

+52
-5
lines changed

3 files changed

+52
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10930,13 +10930,21 @@ class InstructionsCompatibilityAnalysis {
1093010930
}
1093110931
unsigned BestOpcodeNum = 0;
1093210932
MainOp = nullptr;
10933+
bool UsedOutside = false;
1093310934
for (const auto &P : Candidates) {
10935+
bool PUsedOutside = all_of(P.second, isUsedOutsideBlock);
10936+
if (UsedOutside && !PUsedOutside)
10937+
continue;
10938+
if (!UsedOutside && PUsedOutside)
10939+
BestOpcodeNum = 0;
1093410940
if (P.second.size() < BestOpcodeNum)
1093510941
continue;
1093610942
// If have inner dependencies - skip.
10937-
if (any_of(P.second,
10938-
[&](Instruction *I) { return Operands.contains(I); }))
10943+
if (!PUsedOutside && any_of(P.second, [&](Instruction *I) {
10944+
return Operands.contains(I);
10945+
}))
1093910946
continue;
10947+
UsedOutside = PUsedOutside;
1094010948
for (Instruction *I : P.second) {
1094110949
if (IsSupportedInstruction(I, AnyUndef)) {
1094210950
MainOp = I;

llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ define <4 x i32> @test() {
88
; CHECK-NEXT: br label %[[BB1:.*]]
99
; CHECK: [[BB1]]:
1010
; CHECK-NEXT: [[OR:%.*]] = or i32 [[TRUNC]], 0
11-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[TRUNC]], i32 0
12-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 0, i32 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, i32 [[TRUNC]], i32 0
1312
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
14-
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
13+
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> <i32 -1, i32 0, i32 0, i32 0>, [[TMP2]]
1514
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[OR]] to i64
1615
; CHECK-NEXT: br label %[[BB3:.*]]
1716
; CHECK: [[BB3]]:
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
3+
4+
define void @test(i32 %arg) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: br label %[[BB1:.*]]
9+
; CHECK: [[BB1]]:
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[ARG]], i32 2
11+
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 7, i32 7, i32 0, i32 7>, [[TMP0]]
12+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
13+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP2]], 0
14+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
15+
; CHECK-NEXT: br i1 false, label %[[BB2:.*]], label %[[BB3:.*]]
16+
; CHECK: [[BB2]]:
17+
; CHECK-NEXT: br label %[[BB3]]
18+
; CHECK: [[BB3]]:
19+
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP1]], %[[BB2]] ], [ [[TMP3]], %[[BB1]] ]
20+
; CHECK-NEXT: ret void
21+
;
22+
bb:
23+
br label %bb1
24+
25+
bb1:
26+
%sub = sub i32 0, %arg
27+
%add = add i32 7, 0
28+
%icmp = icmp ult i32 %add, 0
29+
br i1 false, label %bb2, label %bb3
30+
31+
bb2:
32+
br label %bb3
33+
34+
bb3:
35+
%phi = phi i32 [ 7, %bb2 ], [ 0, %bb1 ]
36+
%phi4 = phi i32 [ %sub, %bb2 ], [ %sub, %bb1 ]
37+
%phi5 = phi i32 [ %add, %bb2 ], [ %add, %bb1 ]
38+
%phi6 = phi i32 [ %add, %bb2 ], [ 0, %bb1 ]
39+
ret void
40+
}

0 commit comments

Comments
 (0)