Skip to content

Conversation

@artagnon
Copy link
Contributor

@artagnon artagnon commented Oct 1, 2024

As AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination.

@llvmbot
Copy link
Member

llvmbot commented Oct 1, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

Changes

As AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination.

-- 8< --
Based on #110669.


Full diff: https://github.com/llvm/llvm-project/pull/110670.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+24-14)
  • (modified) llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll (+53-3)
  • (modified) llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll (+46)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a304f7b056f5f7..40251db9731a62 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1016,15 +1016,25 @@ struct DSEState {
       return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
     }
 
-    const TypeSize KillingSize = KillingLocSize.getValue();
-    const TypeSize DeadSize = DeadLoc.Size.getValue();
-    // Bail on doing Size comparison which depends on AA for now
-    // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
-    const bool AnyScalable =
-        DeadSize.isScalable() || KillingLocSize.isScalable();
-
-    if (AnyScalable)
-      return OW_Unknown;
+    APInt KillingSize = APInt(64, KillingLocSize.getValue().getKnownMinValue());
+    APInt DeadSize = APInt(64, DeadLoc.Size.getValue().getKnownMinValue());
+
+    // We can compare lower-range(KillingSize) with upper-range(DeadSize), using
+    // VScale.
+    ConstantRange CR = getVScaleRange(&F, 64);
+    if (KillingLocSize.isScalable()) {
+      bool Overflow;
+      APInt LowerRange = CR.getUnsignedMin().umul_ov(KillingSize, Overflow);
+      if (!Overflow)
+        KillingSize = LowerRange;
+    }
+    if (DeadLoc.Size.isScalable()) {
+      bool Overflow;
+      APInt UpperRange = CR.getUnsignedMax().umul_ov(DeadSize, Overflow);
+      if (!Overflow)
+        DeadSize = UpperRange;
+    }
+
     // Query the alias information
     AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
 
@@ -1032,14 +1042,14 @@ struct DSEState {
     // the killing store was larger than the dead store.
     if (AAR == AliasResult::MustAlias) {
       // Make sure that the KillingSize size is >= the DeadSize size.
-      if (KillingSize >= DeadSize)
+      if (KillingSize.uge(DeadSize))
         return OW_Complete;
     }
 
     // If we hit a partial alias we may have a full overwrite
     if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
       int32_t Off = AAR.getOffset();
-      if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
+      if (Off >= 0 && KillingSize.uge(uint64_t(Off) + DeadSize))
         return OW_Complete;
     }
 
@@ -1089,16 +1099,16 @@ struct DSEState {
     if (DeadOff >= KillingOff) {
       // If the dead access ends "not after" the killing access then the
       // dead one is completely overwritten by the killing one.
-      if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
+      if (KillingSize.uge(uint64_t(DeadOff - KillingOff) + DeadSize))
         return OW_Complete;
       // If start of the dead access is "before" end of the killing access
       // then accesses overlap.
-      else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
+      if (KillingSize.ugt(uint64_t(DeadOff - KillingOff)))
         return OW_MaybePartial;
     }
     // If start of the killing access is "before" end of the dead access then
     // accesses overlap.
-    else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
+    else if (DeadSize.ugt(uint64_t(KillingOff - DeadOff))) {
       return OW_MaybePartial;
     }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
index fbab350008f4ee..c2f7adde7a22ee 100644
--- a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
@@ -47,6 +47,29 @@ bb:
   ret void
 }
 
+define void @ScalableVectorTestFullyOverlapping(ptr %arg, i32 %i) {
+; CHECK-LABEL: @ScalableVectorTestFullyOverlapping(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I7:%.*]] = add nuw nsw i32 [[I:%.*]], 1
+; CHECK-NEXT:    [[I8:%.*]] = zext i32 [[I7]] to i64
+; CHECK-NEXT:    [[I9:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I8]]
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[I9]], align 4
+; CHECK-NEXT:    [[I2:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT:    [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I2]]
+; CHECK-NEXT:    store <vscale x 2 x float> zeroinitializer, ptr [[I3]], align 16
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i7 = add nuw nsw i32 %i, 1
+  %i8 = zext i32 %i7 to i64
+  %i9 = getelementptr inbounds float, ptr %arg, i64 %i8
+  store float 0.0, ptr %i9, align 4
+  %i2 = zext i32 %i to i64
+  %i3 = getelementptr inbounds float, ptr %arg, i64 %i2
+  store <vscale x 2 x float> zeroinitializer, ptr %i3, align 16
+  ret void
+}
+
 define void @ArrayTestPartiallyOverlapping(i64 %0) {
 ;
 ; The DSE pass will not kill the store because the overlap is partial
@@ -55,9 +78,9 @@ define void @ArrayTestPartiallyOverlapping(i64 %0) {
 ; CHECK-LABEL: @ArrayTestPartiallyOverlapping(
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 10
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP0]], 15
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP5]]
-; CHECK-NEXT:    store i32 1, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP0]], 15
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    store i32 1, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    store i64 0, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -97,3 +120,30 @@ bb:
   ret void
 }
 
+define void @ScalableVectorTestPartiallyOverlapping(ptr %arg, i32 %i) {
+;
+; The DSE pass will not kill the store because the overlap is partial
+; and won't fully clobber the original store.
+;
+; CHECK-LABEL: @ScalableVectorTestPartiallyOverlapping(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I2:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I2]]
+; CHECK-NEXT:    store <vscale x 2 x float> zeroinitializer, ptr [[I3]], align 16
+; CHECK-NEXT:    [[I5:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I6:%.*]] = zext i32 [[I5]] to i64
+; CHECK-NEXT:    [[I7:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I6]]
+; CHECK-NEXT:    store <vscale x 2 x float> zeroinitializer, ptr [[I7]], align 16
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i2 = zext i32 %i to i64
+  %i3 = getelementptr inbounds float, ptr %arg, i64 %i2
+  store <vscale x 2 x float> zeroinitializer, ptr %i3, align 16
+  %i5 = add nuw nsw i32 %i, 1
+  %i6 = zext i32 %i5 to i64
+  %i7 = getelementptr inbounds float, ptr %arg, i64 %i6
+  store <vscale x 2 x float> zeroinitializer, ptr %i7, align 16
+  ret void
+}
+
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index c9a0943de8cd98..e89d6dea164750 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -655,3 +655,49 @@ exit:
   call void @use(ptr %p) argmemonly
   ret void
 }
+
+define void @scalable_scalable_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @scalable_scalable_redundant_store(
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %gep
+  store <vscale x 4 x i64> zeroinitializer, ptr %ptr
+  ret void
+}
+
+define void @scalable_fixed_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @scalable_fixed_redundant_store(
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 2
+  store <2 x i64> zeroinitializer, ptr %gep
+  store <vscale x 4 x i64> zeroinitializer, ptr %ptr
+  ret void
+}
+
+define void @fixed_scalable_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @fixed_scalable_redundant_store(
+; CHECK-NEXT:    store <128 x i64> zeroinitializer, ptr [[PTR:%.*]], align 1024
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %gep
+  store <128 x i64> zeroinitializer, ptr %ptr
+  ret void
+}
+
+define void @scalable_scalable_neg(ptr %ptr) {
+; CHECK-LABEL: @scalable_scalable_neg(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT:    store <vscale x 4 x i64> zeroinitializer, ptr [[GEP]], align 32
+; CHECK-NEXT:    store <vscale x 2 x i64> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i64, ptr %ptr, i64 8
+  store <vscale x 4 x i64> zeroinitializer, ptr %gep
+  store <vscale x 2 x i64> zeroinitializer, ptr %ptr
+  ret void
+}

@artagnon artagnon requested a review from fhahn October 10, 2024 10:24
@artagnon
Copy link
Contributor Author

Gentle ping. Can we verify whether or not this is correct?

@artagnon
Copy link
Contributor Author

Gentle ping.

@artagnon
Copy link
Contributor Author

Gentle ping.

1 similar comment
@artagnon
Copy link
Contributor Author

Gentle ping.

As AliasAnalysis now has support for scalable sizes, lift the limitation
on analyzing scalable sizes in DeadStoreElimination.
@artagnon
Copy link
Contributor Author

artagnon commented Dec 9, 2024

Gentle ping.

2 similar comments
@artagnon
Copy link
Contributor Author

Gentle ping.

@artagnon
Copy link
Contributor Author

artagnon commented Jan 9, 2025

Gentle ping.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there reason to believe this is useful in practice?

Can we just do the trivial generalization here (the MustAlias case) without introducing more complex reasoning?

@artagnon
Copy link
Contributor Author

Is there reason to believe this is useful in practice?

Can we just do the trivial generalization here (the MustAlias case) without introducing more complex reasoning?

Will llvm-opt-benchmark tell us? I've submitted a job, and it should appear here shortly.

@nikic
Copy link
Contributor

nikic commented Jan 14, 2025

llvm-opt-benchmark uses an x86_64 target, which doesn't have scalable vectors, so no.

@artagnon
Copy link
Contributor Author

llvm-opt-benchmark uses an x86_64 target, which doesn't have scalable vectors, so no.

Okay, I have a proposal: in the morning, I will cut down the size of this patch by restricting it to MustAlias. Then, I could run llvm-test-suite offline with the remainder of the patch (i.e. the PartialAlias) on the RISC-V target and check for binary changes: if there are any, I will post the diff with a follow up patch.

@fhahn
Copy link
Contributor

fhahn commented Jan 14, 2025

IIUC scalable vectors will in most cases only be introduced by LoopVectorize, so you might have to do LTO to have a chance to tigger this in practice?

@artagnon
Copy link
Contributor Author

IIUC scalable vectors will in most cases only be introduced by LoopVectorize, so you might have to do LTO to have a chance to tigger this in practice?

I see: doesn't DSE run after LV in the normal compiler flow at all?

@artagnon artagnon closed this Apr 3, 2025
@artagnon artagnon deleted the dse-aa-scalable branch April 3, 2025 10:30
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants