Skip to content

Commit c915974

Browse files
sushgokhtstellar
authored andcommitted
[LV] Reland "Update logic for calculating register usage due to invariants"
Previously, while calculating register usage due to invariants, it was assumed that invariant would always be part of widening instructions. This resulted in calculating vector register types for vectors which cant be legalized(check the newly added test for more details). An invariant might not always need a vector register. For e.g., invariant might just be used for iteration check. This patch checks if the invariant is part of any widening instruction and considers register usage accordingly. Fixes issue 60493 Differential Revision: https://reviews.llvm.org/D143422 (cherry picked from commit 4f9a544)
1 parent f5f4825 commit c915974

File tree

6 files changed

+60
-14
lines changed

6 files changed

+60
-14
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5953,7 +5953,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
59535953
// Saves the list of values that are used in the loop but are defined outside
59545954
// the loop (not including non-instruction values such as arguments and
59555955
// constants).
5956-
SmallPtrSet<Value *, 8> LoopInvariants;
5956+
SmallPtrSet<Instruction *, 8> LoopInvariants;
59575957

59585958
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
59595959
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -6079,11 +6079,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
60796079
for (auto *Inst : LoopInvariants) {
60806080
// FIXME: The target might use more than one register for the type
60816081
// even in the scalar case.
6082-
unsigned Usage =
6083-
VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
6082+
bool IsScalar = all_of(Inst->users(), [&](User *U) {
6083+
auto *I = cast<Instruction>(U);
6084+
return TheLoop != LI->getLoopFor(I->getParent()) ||
6085+
isScalarAfterVectorization(I, VFs[i]);
6086+
});
6087+
6088+
ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i];
60846089
unsigned ClassID =
6085-
TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType());
6086-
Invariant[ClassID] += Usage;
6090+
TTI.getRegisterClassForType(VF.isVector(), Inst->getType());
6091+
Invariant[ClassID] += GetRegUsage(Inst->getType(), VF);
60876092
}
60886093

60896094
LLVM_DEBUG({
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -debug-only=loop-vectorize -disable-output <%s 2>&1 | FileCheck %s
4+
5+
; Invariant register usage calculation should take into account if the
6+
; invariant would be used in widened instructions. Only in such cases, a vector
7+
; register would be required for holding the invariant. For all other cases
8+
; such as below(where usage of %0 in loop doesnt require vector register), a
9+
; general purpose register suffices.
10+
; Check that below test doesn't crash while calculating register usage for
11+
; invariant %0
12+
13+
@string = internal unnamed_addr constant [5 x i8] c"abcd\00", align 1
14+
define void @get_invariant_reg_usage(ptr %z) {
15+
; CHECK: LV: Checking a loop in 'get_invariant_reg_usage'
16+
; CHECK: LV(REG): VF = vscale x 16
17+
; CHECK: LV(REG): Found max usage: 1 item
18+
; CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
19+
; CHECK: LV(REG): Found invariant usage: 2 item
20+
; CHECK: LV(REG): RegisterClass: Generic::VectorRC, 8 registers
21+
; CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
22+
23+
L.entry:
24+
%0 = load i128, ptr %z, align 16
25+
%1 = icmp slt i128 %0, 1
26+
%a = getelementptr i8, ptr %z, i64 1
27+
br i1 %1, label %return, label %loopbody
28+
29+
loopbody: ;preds = %L.entry, %loopbody
30+
%b = phi ptr [ %2, %loopbody ], [ @string, %L.entry ]
31+
%len_input = phi i128 [ %len, %loopbody ], [ %0, %L.entry ]
32+
%len = add nsw i128 %len_input, -1
33+
%2 = getelementptr i8, ptr %b, i64 1
34+
%3 = load i8, ptr %b, align 1
35+
store i8 %3, ptr %a, align 4
36+
%.not = icmp eq i128 %len, 0
37+
br i1 %.not, label %return, label %loopbody
38+
39+
return: ;preds = %loopexit, %L.entry
40+
ret void
41+
}

llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
175175
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
176176
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
177177
;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
178-
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers
178+
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
179179

180180
;CHECK-PWR9: LV(REG): VF = 1
181181
;CHECK-PWR9: LV(REG): Found max usage: 2 item

llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,22 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
3131
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
3232
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
3333
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
34-
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
34+
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
3535
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
3636
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
3737
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
3838
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
39-
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
39+
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
4040
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
4141
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
4242
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
4343
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
44-
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
44+
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
4545
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
4646
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
4747
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
4848
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
49-
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
49+
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
5050

5151
entry:
5252
%conv = zext i32 %size to i64

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
102102
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
103103
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
104104
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
105-
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
105+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
106106
; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class
107107
; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class
108108
; CHECK-NEXT: LV: Loop cost is 23
@@ -234,7 +234,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
234234
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
235235
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
236236
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
237-
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
237+
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
238238
; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class
239239
; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class
240240
; CHECK-NEXT: LV: Loop cost is 23

llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ target triple = "x86_64-unknown-linux-gnu"
2626
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
2727
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
2828
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
29-
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
29+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers
3030

3131
define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
3232
entry:
@@ -68,7 +68,7 @@ for.end: ; preds = %for.end.loopexit, %
6868
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
6969
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
7070
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
71-
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
71+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers
7272

7373
define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
7474
entry:

0 commit comments

Comments
 (0)