Skip to content

Conversation

@vporpo
Copy link
Contributor

@vporpo vporpo commented Nov 7, 2024

When scalars get replaced by vectors the original scalars may become dead. In that case erase them.

@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2024

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: vporpo (vporpo)

Changes

When scalars get replaced by vectors the original scalars may become dead. In that case erase them.


Full diff: https://github.com/llvm/llvm-project/pull/115267.diff

3 Files Affected:

  • (modified) llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h (+2)
  • (modified) llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp (+21-1)
  • (modified) llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll (+22-34)
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
index 18e34bcec81b06..af27477f2ce303 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
@@ -25,10 +25,12 @@ namespace llvm::sandboxir {
 class BottomUpVec final : public FunctionPass {
   bool Change = false;
   std::unique_ptr<LegalityAnalysis> Legality;
+  DenseSet<Instruction *> DeadInstrCandidates;
 
   /// Creates and returns a vector instruction that replaces the instructions in
   /// \p Bndl. \p Operands are the already vectorized operands.
   Value *createVectorInstr(ArrayRef<Value *> Bndl, ArrayRef<Value *> Operands);
+  void tryEraseDeadInstrs();
   Value *vectorizeRec(ArrayRef<Value *> Bndl);
   bool tryVectorize(ArrayRef<Value *> Seeds);
 
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index 0a930d30aeab58..ac2565816ff7d1 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -153,6 +153,20 @@ Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl,
   // TODO: Propagate debug info.
 }
 
+void BottomUpVec::tryEraseDeadInstrs() {
+  bool Change = true;
+  while (Change) {
+    Change = false;
+    for (Instruction *I : make_early_inc_range(DeadInstrCandidates)) {
+      if (I->hasNUses(0)) {
+        Change = true;
+        I->eraseFromParent();
+        DeadInstrCandidates.erase(I);
+      }
+    }
+  }
+}
+
 Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
   Value *NewVec = nullptr;
   const auto &LegalityRes = Legality->canVectorize(Bndl);
@@ -182,7 +196,11 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
     }
     NewVec = createVectorInstr(Bndl, VecOperands);
 
-    // TODO: Collect potentially dead instructions.
+    // Collect the original scalar instructions as they may be dead.
+    if (NewVec != nullptr) {
+      for (Value *V : Bndl)
+        DeadInstrCandidates.insert(cast<Instruction>(V));
+    }
     break;
   }
   case LegalityResultID::Pack: {
@@ -194,7 +212,9 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
 }
 
 bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
+  DeadInstrCandidates.clear();
   vectorizeRec(Bndl);
+  tryEraseDeadInstrs();
   return Change;
 }
 
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index e56dbd75963f7a..4f8d821c7ed581 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -6,11 +6,7 @@ define void @store_load(ptr %ptr) {
 ; CHECK-SAME: ptr [[PTR:%.*]]) {
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
-; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    store float [[LD0]], ptr [[PTR0]], align 4
-; CHECK-NEXT:    store float [[LD1]], ptr [[PTR1]], align 4
 ; CHECK-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -31,14 +27,8 @@ define void @store_fpext_load(ptr %ptr) {
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[PTRD0:%.*]] = getelementptr double, ptr [[PTR]], i32 0
 ; CHECK-NEXT:    [[PTRD1:%.*]] = getelementptr double, ptr [[PTR]], i32 1
-; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[FPEXT0:%.*]] = fpext float [[LD0]] to double
-; CHECK-NEXT:    [[FPEXT1:%.*]] = fpext float [[LD1]] to double
 ; CHECK-NEXT:    [[VCAST:%.*]] = fpext <2 x float> [[VECL]] to <2 x double>
-; CHECK-NEXT:    store double [[FPEXT0]], ptr [[PTRD0]], align 8
-; CHECK-NEXT:    store double [[FPEXT1]], ptr [[PTRD1]], align 8
 ; CHECK-NEXT:    store <2 x double> [[VCAST]], ptr [[PTRD0]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -62,20 +52,10 @@ define void @store_fcmp_zext_load(ptr %ptr) {
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
 ; CHECK-NEXT:    [[PTRB0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
 ; CHECK-NEXT:    [[PTRB1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1
-; CHECK-NEXT:    [[LDB0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LDB1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LDA0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LDA1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[FCMP0:%.*]] = fcmp ogt float [[LDA0]], [[LDB0]]
-; CHECK-NEXT:    [[FCMP1:%.*]] = fcmp ogt float [[LDA1]], [[LDB1]]
 ; CHECK-NEXT:    [[VCMP:%.*]] = fcmp ogt <2 x float> [[VECL]], [[VECL1]]
-; CHECK-NEXT:    [[ZEXT0:%.*]] = zext i1 [[FCMP0]] to i32
-; CHECK-NEXT:    [[ZEXT1:%.*]] = zext i1 [[FCMP1]] to i32
 ; CHECK-NEXT:    [[VCAST:%.*]] = zext <2 x i1> [[VCMP]] to <2 x i32>
-; CHECK-NEXT:    store i32 [[ZEXT0]], ptr [[PTRB0]], align 4
-; CHECK-NEXT:    store i32 [[ZEXT1]], ptr [[PTRB1]], align 4
 ; CHECK-NEXT:    store <2 x i32> [[VCAST]], ptr [[PTRB0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -101,17 +81,9 @@ define void @store_fadd_load(ptr %ptr) {
 ; CHECK-SAME: ptr [[PTR:%.*]]) {
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
-; CHECK-NEXT:    [[LDA0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LDA1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LDB0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LDB1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[FADD0:%.*]] = fadd float [[LDA0]], [[LDB0]]
-; CHECK-NEXT:    [[FADD1:%.*]] = fadd float [[LDA1]], [[LDB1]]
 ; CHECK-NEXT:    [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]]
-; CHECK-NEXT:    store float [[FADD0]], ptr [[PTR0]], align 4
-; CHECK-NEXT:    store float [[FADD1]], ptr [[PTR1]], align 4
 ; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -133,14 +105,8 @@ define void @store_fneg_load(ptr %ptr) {
 ; CHECK-SAME: ptr [[PTR:%.*]]) {
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
-; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[FNEG0:%.*]] = fneg float [[LD0]]
-; CHECK-NEXT:    [[FNEG1:%.*]] = fneg float [[LD1]]
 ; CHECK-NEXT:    [[VEC:%.*]] = fneg <2 x float> [[VECL]]
-; CHECK-NEXT:    store float [[FNEG0]], ptr [[PTR0]], align 4
-; CHECK-NEXT:    store float [[FNEG1]], ptr [[PTR1]], align 4
 ; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -155,3 +121,25 @@ define void @store_fneg_load(ptr %ptr) {
   ret void
 }
 
+define float @sclars_with_external_uses_not_dead(ptr %ptr) {
+; CHECK-LABEL: define float @sclars_with_external_uses_not_dead(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
+; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
+; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; CHECK-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; CHECK-NEXT:    [[USER:%.*]] = fneg float [[LD1]]
+; CHECK-NEXT:    ret float [[LD0]]
+;
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ld0 = load float, ptr %ptr0
+  %ld1 = load float, ptr %ptr1
+  store float %ld0, ptr %ptr0
+  store float %ld1, ptr %ptr1
+  %user = fneg float %ld1
+  ret float %ld0
+}
+

while (Change) {
Change = false;
for (Instruction *I : make_early_inc_range(DeadInstrCandidates)) {
if (I->hasNUses(0)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you traverse the instructions in the reverse order, you only need to do this once I believe. You can fix the order when you populate the set.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this to a single loop. I am sorting DeadInstrCandidates right before walking through it.

When scalars get replaced by vectors the original scalars may
become dead. In that case erase them.
@vporpo vporpo merged commit 7dffc96 into llvm:main Nov 8, 2024
6 of 8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 8, 2024

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/4406

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: 1200 seconds without output running [b'ninja', b'-j 4', b'check-openmp'], attempting to kill
...
PASS: ompd-test :: openmp_examples/example_2.c (439 of 449)
PASS: ompd-test :: openmp_examples/example_4.c (440 of 449)
PASS: ompd-test :: openmp_examples/example_task.c (441 of 449)
PASS: ompd-test :: openmp_examples/example_5.c (442 of 449)
UNSUPPORTED: ompd-test :: openmp_examples/ompd_bt.c (443 of 449)
PASS: ompd-test :: openmp_examples/fibonacci.c (444 of 449)
UNSUPPORTED: ompd-test :: openmp_examples/ompd_parallel.c (445 of 449)
PASS: ompd-test :: openmp_examples/parallel.c (446 of 449)
PASS: ompd-test :: openmp_examples/nested.c (447 of 449)
PASS: ompd-test :: openmp_examples/ompd_icvs.c (448 of 449)
command timed out: 1200 seconds without output running [b'ninja', b'-j 4', b'check-openmp'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1329.586228

Groverkss pushed a commit to iree-org/llvm-project that referenced this pull request Nov 15, 2024
When scalars get replaced by vectors the original scalars may become
dead. In that case erase them.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants