Skip to content

Commit d611a9c

Browse files
authored
[LV][VPlan] Reduce register usage of VPEVLBasedIVPHIRecipe. (#154482)
`VPEVLBasedIVPHIRecipe` will lower to VPInstruction scalar phi and generate scalar phi. This recipe will only occupy a scalar register just like other phi recipes. This patch fix the register usage for `VPEVLBasedIVPHIRecipe` from vector to scalar which is close to generated vector IR. https://godbolt.org/z/6Mzd6W6ha shows that no register spills when choosing `<vscale x 16>`. Note that this test is basically copied from AArch64.
1 parent cf0e861 commit d611a9c

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
555555

556556
if (VFs[J].isScalar() ||
557557
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
558-
VPScalarIVStepsRecipe>(R) ||
558+
VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(R) ||
559559
(isa<VPInstruction>(R) &&
560560
vputils::onlyScalarValuesUsed(cast<VPSingleDefRecipe>(R))) ||
561561
(isa<VPReductionPHIRecipe>(R) &&
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP
3+
4+
define i32 @dotp(ptr %a, ptr %b) {
5+
; CHECK-REGS-VP: LV(REG): VF = vscale x 16
6+
; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
7+
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
8+
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers
9+
; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
10+
; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
11+
; CHECK-REGS-VP: LV: Selecting VF: vscale x 16.
12+
entry:
13+
br label %for.body
14+
15+
for.body: ; preds = %for.body, %entry
16+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
17+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
18+
%gep.a = getelementptr i8, ptr %a, i64 %iv
19+
%load.a = load i8, ptr %gep.a, align 1
20+
%ext.a = zext i8 %load.a to i32
21+
%gep.b = getelementptr i8, ptr %b, i64 %iv
22+
%load.b = load i8, ptr %gep.b, align 1
23+
%ext.b = zext i8 %load.b to i32
24+
%mul = mul i32 %ext.b, %ext.a
25+
%sub = sub i32 0, %mul
26+
%add = add i32 %accum, %sub
27+
%iv.next = add i64 %iv, 1
28+
%exitcond.not = icmp eq i64 %iv.next, 1024
29+
br i1 %exitcond.not, label %for.exit, label %for.body
30+
31+
for.exit: ; preds = %for.body
32+
ret i32 %add
33+
}

0 commit comments

Comments
 (0)