From 3990d82c8e6eff13cfebf58832c2e4f9b0be9617 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Tue, 19 Aug 2025 23:19:59 -0700 Subject: [PATCH 1/4] Precommit test case. --- .../RISCV/maxbandwidth-regpressure.ll | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll new file mode 100644 index 0000000000000..af4d76d853bfd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll @@ -0,0 +1,38 @@ +; REQUIRES: asserts +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP +define i32 @dotp(ptr %a, ptr %b) { +; CHECK-REGS-VP: LV(REG): VF = vscale x 16 +; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 40 registers +; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers +; CHECK-REGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers +; CHECK-REGS-VP: LV: Selecting VF: vscale x 8. +; +; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers +; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers +; CHECK-NOREGS-VP: LV: Selecting VF: vscale x 4. +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %gep.a = getelementptr i8, ptr %a, i64 %iv + %load.a = load i8, ptr %gep.a, align 1 + %ext.a = zext i8 %load.a to i32 + %gep.b = getelementptr i8, ptr %b, i64 %iv + %load.b = load i8, ptr %gep.b, align 1 + %ext.b = zext i8 %load.b to i32 + %mul = mul i32 %ext.b, %ext.a + %sub = sub i32 0, %mul + %add = add i32 %accum, %sub + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.exit, label %for.body + +for.exit: ; preds = %for.body + ret i32 %add +} From ec2e9aa7a494a1c18a56e9d87b756d0594595140 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Tue, 19 Aug 2025 23:33:55 -0700 Subject: [PATCH 2/4] [LV][VPlan] Reduce register usage of VPEVLBasedIVPHIRecipe. VPEVLBasedIVPHIRecipe will lower to VPInstruction scalar phi and generate scalar phi. This recipe will only use a scalar register just like other phi recipes. This patch fix the register usage for VPEVLBasedIVPHIRecipe from vector to scalar which is close to generated vector IR. https://godbolt.org/z/6Mzd6W6ha shows that no register spills when choosing . --- llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 2 +- .../LoopVectorize/RISCV/maxbandwidth-regpressure.ll | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index b39231f106300..b46d99052a1dd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -555,7 +555,7 @@ SmallVector llvm::calculateRegisterUsageForPlan( if (VFs[J].isScalar() || isa(R) || + VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(R) || (isa(R) && all_of(cast(R)->users(), [&](VPUser *U) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll index af4d76d853bfd..71b26aa77ce88 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll @@ -4,12 +4,11 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-REGS-VP: LV(REG): VF = vscale x 16 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item -; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers -; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 40 registers +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers ; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-REGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers -; CHECK-REGS-VP: LV: Selecting VF: vscale x 8. +; CHECK-REGS-VP: LV: Selecting VF: vscale x 16. ; ; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers ; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers From c6eb21b7f8cfb9fa060a467d2a03eee02037f2c0 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Wed, 20 Aug 2025 00:59:16 -0700 Subject: [PATCH 3/4] address comments. --- ...{maxbandwidth-regpressure.ll => reg-usage-maxbandwidth.ll} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename llvm/test/Transforms/LoopVectorize/RISCV/{maxbandwidth-regpressure.ll => reg-usage-maxbandwidth.ll} (87%) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll similarity index 87% rename from llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll rename to llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll index 71b26aa77ce88..8f9f543149285 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP +; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP define i32 @dotp(ptr %a, ptr %b) { ; CHECK-REGS-VP: LV(REG): VF = vscale x 16 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item From 0a10d125c0f7138368fa4b641e2458a7f7aa03c3 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Wed, 20 Aug 2025 02:16:39 -0700 Subject: [PATCH 4/4] !fixup, add newline and remove unneed runs. --- .../LoopVectorize/RISCV/reg-usage-maxbandwidth.ll | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll index 8f9f543149285..6bb0d64314d3e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP -; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP + define i32 @dotp(ptr %a, ptr %b) { ; CHECK-REGS-VP: LV(REG): VF = vscale x 16 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item @@ -8,11 +8,7 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers ; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers -; CHECK-REGS-VP: LV: Selecting VF: vscale x 16. -; -; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers -; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers -; CHECK-NOREGS-VP: LV: Selecting VF: vscale x 4. +; CHECK-REGS-VP: LV: Selecting VF: vscale x 16. entry: br label %for.body