[VPlan] Handle WidenGEP in narrowToSingleScalars #166740

artagnon · 2025-11-06T10:27:07Z

This allows us to strip a special case in VPWidenGEP::execute.

llvmbot · 2025-11-06T10:27:40Z

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

Changes

This patch is similar to b0b4616 ([VPlan] Handle single-scalar conds in VPWidenSelectRecipe) in spirit.

Full diff: https://github.com/llvm/llvm-project/pull/166740.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlan.h (-6)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll (+7-7)
(modified) llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll (+4-4)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bbb03fbdff7a2..403bd8d7d814e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1786,12 +1786,6 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
     return getOperand(I + 1)->isDefinedOutsideLoopRegions();
   }
 
-  bool areAllOperandsInvariant() const {
-    return all_of(operands(), [](VPValue *Op) {
-      return Op->isDefinedOutsideLoopRegions();
-    });
-  }
-
 public:
   VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands)
       : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 80cd112dbcd8a..dc0b59c48d3a6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2507,8 +2507,8 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
   // is vector-typed. Thus, to keep the representation compact, we only use
   // vector-typed operands for loop-varying values.
 
-  if (areAllOperandsInvariant()) {
-    // If we are vectorizing, but the GEP has only loop-invariant operands,
+  if (all_of(operands(), vputils::isSingleScalar)) {
+    // If we are vectorizing, but the GEP has only single-scalar operands,
     // the GEP we build (by only using vector-typed operands for
     // loop-varying values) would be a scalar pointer. Thus, to ensure we
     // produce a vector of pointers, we need to either arbitrarily pick an
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
index 212a5c99676f4..14d2c123403f2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
@@ -63,7 +63,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
 ; CHECK-NEXT:    store i32 [[STORE]], ptr [[NBRBOXES]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -114,7 +114,7 @@ define void @predicated_strided_store(ptr %start) {
 ; RVA23-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
 ; RVA23-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; RVA23-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RVA23-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RVA23-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; RVA23:       middle.block:
 ; RVA23-NEXT:    br label [[LOOP:%.*]]
 ; RVA23:       exit:
@@ -141,7 +141,7 @@ define void @predicated_strided_store(ptr %start) {
 ; RVA23ZVL1024B-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
 ; RVA23ZVL1024B-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; RVA23ZVL1024B-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RVA23ZVL1024B-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RVA23ZVL1024B-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; RVA23ZVL1024B:       middle.block:
 ; RVA23ZVL1024B-NEXT:    br label [[LOOP:%.*]]
 ; RVA23ZVL1024B:       exit:
@@ -185,16 +185,16 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[DOTSPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
 ; CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]]
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       exit:
diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll
index d08ca8c99e8ba..c18cbde8340fa 100644
--- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll
+++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll
@@ -55,11 +55,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC]], align 8
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[N]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT:    store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8
+; CHECK-NEXT:    store <4 x ptr> [[DOTSPLAT]], ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]

lukel97

LGTM

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn

This is different to VPWidenSelect I think, which even with scalar condition produces a vector result for vector operands?

if all operands are single scalar, we should be able to handle this in narrowToSingleScalar, and remove the special handling in the recipe altogether?

lukel97 · 2025-11-06T13:30:44Z

This is different to VPWidenSelect I think, which even with scalar condition produces a vector result for vector operands?

if all operands are single scalar, we should be able to handle this in narrowToSingleScalar, and remove the special handling in the recipe altogether?

I agree it would be nice to handle it there but I don't think we have a recipe to represent an arbitrary scalar GEP yet. I think we only have VPInstruction:PtrAdd.

Edit: Nevermind, maybe we can use a replicate recipe?

artagnon · 2025-11-06T14:13:43Z

if all operands are single scalar, we should be able to handle this in narrowToSingleScalar, and remove the special handling in the recipe altogether?

This is currently not possible, as narrowToSingleScalars' users check is too weak.

artagnon · 2025-11-06T14:23:09Z

if all operands are single scalar, we should be able to handle this in narrowToSingleScalar, and remove the special handling in the recipe altogether?

This is currently not possible, as narrowToSingleScalars' users check is too weak.

To prove this, I've rebased onto #166559.

fhahn · 2025-11-06T21:23:00Z

if all operands are single scalar, we should be able to handle this in narrowToSingleScalar, and remove the special handling in the recipe altogether?

This is currently not possible, as narrowToSingleScalars' users check is too weak.

To prove this, I've rebased onto #166559.

Thanks, basing this on narrowToSingleScalar seems like a promising way forward, nicely simplifying ::execute

artagnon · 2025-11-10T18:13:47Z

if all operands are single scalar, we should be able to handle this in narrowToSingleScalar, and remove the special handling in the recipe altogether?

This is currently not possible, as narrowToSingleScalars' users check is too weak.

To prove this, I've rebased onto #166559.

Thanks, basing this on narrowToSingleScalar seems like a promising way forward, nicely simplifying ::execute

Thanks for the guidance; the dependent patch has now landed.

lukel97

New version LGTM

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

github-actions · 2025-11-11T09:57:16Z

✅ With the latest revision this PR passed the C/C++ code formatter.

fhahn

LGTM, thanks

DavidSpickett · 2025-11-11T13:53:20Z

An assertion is failing when trying to compile part of llvm-test-suite after this change.

This is the one with the smallest commit list: https://lab.llvm.org/buildbot/#/builders/4/builds/10404
It is failing on single stage too though: https://lab.llvm.org/buildbot/#/builders/17/builds/12564

So far it has failed on SVE and SVE2, vector length specific and agnostic. SIMD only AArch64 and Arm 32-bit builds are still in progress.

Our build logs have jumbled output due to parallel jobs, but here's the first bit of the failure:

clang: /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp:2529: virtual void llvm::VPWidenGEPRecipe::execute(VPTransformState &): Assertion `any_of(operands(), [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); }) && "Expected at least one loop-variant operand"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.  Program arguments: /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2.install/bin/clang -DNDEBUG -mcpu=neoverse-512tvb -msve-vector-bits=256 -O3 -fno-strict-aliasing -ffp-contract=off -DFMA_DISABLED=1 -ffp-contract=off -DFMA_DISABLED=1 -MD -MT MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.c.o -MF CMakeFiles/oggenc.dir/oggenc.c.o.d -o CMakeFiles/oggenc.dir/oggenc.c.o -c /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/test/test-suite/MultiSource/Applications/oggenc/oggenc.c
1.  <eof> parser at end of file
2.  Optimizer
3.  Running pass "function<eager-inv>(drop-unnecessary-assumes,float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,drop-unnecessary-assumes,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-arithmetic;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;switch-to-arithmetic;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "/home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/test/test-suite/MultiSource/Applications/oggenc/oggenc.c"
4.  Running pass "loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>" on function "floor1_look"

I'll get you a reproducer from one of the bots.

artagnon · 2025-11-11T14:02:28Z

Thanks, I kind of assumed that there would be failures from this: we need test cases for LV.

DavidSpickett · 2025-11-11T14:11:28Z

Reproducer:
oggenc-262911.zip

artagnon · 2025-11-11T14:25:03Z

Thanks, merging a revert for now: #167509.

artagnon requested review from fhahn and lukel97 November 6, 2025 10:27

llvmbot added backend:RISC-V vectorizers llvm:transforms labels Nov 6, 2025

lukel97 approved these changes Nov 6, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Outdated Show resolved Hide resolved

fhahn reviewed Nov 6, 2025

View reviewed changes

artagnon changed the title ~~[VPlan] Optz WidenGEP with single-scalar operands~~ [VPlan] Handle WidenGEP in narrowToSingleScalar Nov 6, 2025

artagnon changed the title ~~[VPlan] Handle WidenGEP in narrowToSingleScalar~~ [VPlan] Handle WidenGEP in narrowToSingleScalars Nov 6, 2025

artagnon force-pushed the vplan-widengep-single-scalar-ops branch from 89ca371 to 4c8e056 Compare November 6, 2025 14:22

artagnon force-pushed the vplan-widengep-single-scalar-ops branch from 4c8e056 to 2fcb902 Compare November 6, 2025 14:50

artagnon force-pushed the vplan-widengep-single-scalar-ops branch 2 times, most recently from b512ce4 to c9d6f38 Compare November 10, 2025 18:12

lukel97 approved these changes Nov 11, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Outdated Show resolved Hide resolved

[VPlan] Handle WidenGEP in narrowToSingleScalars

d4e21b4

artagnon force-pushed the vplan-widengep-single-scalar-ops branch from c9d6f38 to 594957d Compare November 11, 2025 09:53

[VPlan] Fix !all_of nit

7785aeb

artagnon force-pushed the vplan-widengep-single-scalar-ops branch from 594957d to 7785aeb Compare November 11, 2025 09:58

fhahn approved these changes Nov 11, 2025

View reviewed changes

artagnon enabled auto-merge (squash) November 11, 2025 10:25

artagnon merged commit fdd52f5 into llvm:main Nov 11, 2025
10 checks passed

artagnon deleted the vplan-widengep-single-scalar-ops branch November 11, 2025 14:25

artagnon added a commit to artagnon/llvm-project that referenced this pull request Nov 13, 2025

[VPlan] Cherry-pick old crashing patch llvm#166740

7e131ff

artagnon added a commit to artagnon/llvm-project that referenced this pull request Nov 14, 2025

[VPlan] Cherry-pick old crashing patch llvm#166740

e546d7f

artagnon added a commit to artagnon/llvm-project that referenced this pull request Nov 17, 2025

[VPlan] Cherry-pick old crashing patch llvm#166740

08b1e71

artagnon added a commit to artagnon/llvm-project that referenced this pull request Nov 24, 2025

[VPlan] Cherry-pick old crashing patch llvm#166740

fac7660

[VPlan] Handle WidenGEP in narrowToSingleScalars #166740

[VPlan] Handle WidenGEP in narrowToSingleScalars #166740

Uh oh!

Conversation

artagnon commented Nov 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

lukel97 commented Nov 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon commented Nov 6, 2025

Uh oh!

artagnon commented Nov 6, 2025

Uh oh!

fhahn commented Nov 6, 2025

Uh oh!

artagnon commented Nov 10, 2025

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Nov 11, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

DavidSpickett commented Nov 11, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon commented Nov 11, 2025

Uh oh!

DavidSpickett commented Nov 11, 2025

Uh oh!

artagnon commented Nov 11, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

artagnon commented Nov 6, 2025 •

edited

Loading

llvmbot commented Nov 6, 2025 •

edited

Loading

lukel97 commented Nov 6, 2025 •

edited

Loading

github-actions bot commented Nov 11, 2025 •

edited

Loading

DavidSpickett commented Nov 11, 2025 •

edited

Loading