-
Notifications
You must be signed in to change notification settings - Fork 15.2k
DSE: lift limitation on sizes being non-scalable #110670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesAs AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination. -- 8< -- Full diff: https://github.com/llvm/llvm-project/pull/110670.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a304f7b056f5f7..40251db9731a62 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1016,15 +1016,25 @@ struct DSEState {
return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
- const TypeSize KillingSize = KillingLocSize.getValue();
- const TypeSize DeadSize = DeadLoc.Size.getValue();
- // Bail on doing Size comparison which depends on AA for now
- // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
- const bool AnyScalable =
- DeadSize.isScalable() || KillingLocSize.isScalable();
-
- if (AnyScalable)
- return OW_Unknown;
+ APInt KillingSize = APInt(64, KillingLocSize.getValue().getKnownMinValue());
+ APInt DeadSize = APInt(64, DeadLoc.Size.getValue().getKnownMinValue());
+
+ // We can compare lower-range(KillingSize) with upper-range(DeadSize), using
+ // VScale.
+ ConstantRange CR = getVScaleRange(&F, 64);
+ if (KillingLocSize.isScalable()) {
+ bool Overflow;
+ APInt LowerRange = CR.getUnsignedMin().umul_ov(KillingSize, Overflow);
+ if (!Overflow)
+ KillingSize = LowerRange;
+ }
+ if (DeadLoc.Size.isScalable()) {
+ bool Overflow;
+ APInt UpperRange = CR.getUnsignedMax().umul_ov(DeadSize, Overflow);
+ if (!Overflow)
+ DeadSize = UpperRange;
+ }
+
// Query the alias information
AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
@@ -1032,14 +1042,14 @@ struct DSEState {
// the killing store was larger than the dead store.
if (AAR == AliasResult::MustAlias) {
// Make sure that the KillingSize size is >= the DeadSize size.
- if (KillingSize >= DeadSize)
+ if (KillingSize.uge(DeadSize))
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
int32_t Off = AAR.getOffset();
- if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
+ if (Off >= 0 && KillingSize.uge(uint64_t(Off) + DeadSize))
return OW_Complete;
}
@@ -1089,16 +1099,16 @@ struct DSEState {
if (DeadOff >= KillingOff) {
// If the dead access ends "not after" the killing access then the
// dead one is completely overwritten by the killing one.
- if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
+ if (KillingSize.uge(uint64_t(DeadOff - KillingOff) + DeadSize))
return OW_Complete;
// If start of the dead access is "before" end of the killing access
// then accesses overlap.
- else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
+ if (KillingSize.ugt(uint64_t(DeadOff - KillingOff)))
return OW_MaybePartial;
}
// If start of the killing access is "before" end of the dead access then
// accesses overlap.
- else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
+ else if (DeadSize.ugt(uint64_t(KillingOff - DeadOff))) {
return OW_MaybePartial;
}
diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
index fbab350008f4ee..c2f7adde7a22ee 100644
--- a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
@@ -47,6 +47,29 @@ bb:
ret void
}
+define void @ScalableVectorTestFullyOverlapping(ptr %arg, i32 %i) {
+; CHECK-LABEL: @ScalableVectorTestFullyOverlapping(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I7:%.*]] = add nuw nsw i32 [[I:%.*]], 1
+; CHECK-NEXT: [[I8:%.*]] = zext i32 [[I7]] to i64
+; CHECK-NEXT: [[I9:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I8]]
+; CHECK-NEXT: store float 0.000000e+00, ptr [[I9]], align 4
+; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I2]]
+; CHECK-NEXT: store <vscale x 2 x float> zeroinitializer, ptr [[I3]], align 16
+; CHECK-NEXT: ret void
+;
+bb:
+ %i7 = add nuw nsw i32 %i, 1
+ %i8 = zext i32 %i7 to i64
+ %i9 = getelementptr inbounds float, ptr %arg, i64 %i8
+ store float 0.0, ptr %i9, align 4
+ %i2 = zext i32 %i to i64
+ %i3 = getelementptr inbounds float, ptr %arg, i64 %i2
+ store <vscale x 2 x float> zeroinitializer, ptr %i3, align 16
+ ret void
+}
+
define void @ArrayTestPartiallyOverlapping(i64 %0) {
;
; The DSE pass will not kill the store because the overlap is partial
@@ -55,9 +78,9 @@ define void @ArrayTestPartiallyOverlapping(i64 %0) {
; CHECK-LABEL: @ArrayTestPartiallyOverlapping(
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 10
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 15
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP5]]
-; CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 15
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [0 x i8], ptr @BUFFER, i64 0, i64 [[TMP4]]
+; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4
; CHECK-NEXT: store i64 0, ptr [[TMP3]], align 4
; CHECK-NEXT: ret void
;
@@ -97,3 +120,30 @@ bb:
ret void
}
+define void @ScalableVectorTestPartiallyOverlapping(ptr %arg, i32 %i) {
+;
+; The DSE pass will not kill the store because the overlap is partial
+; and won't fully clobber the original store.
+;
+; CHECK-LABEL: @ScalableVectorTestPartiallyOverlapping(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I2]]
+; CHECK-NEXT: store <vscale x 2 x float> zeroinitializer, ptr [[I3]], align 16
+; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64
+; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I6]]
+; CHECK-NEXT: store <vscale x 2 x float> zeroinitializer, ptr [[I7]], align 16
+; CHECK-NEXT: ret void
+;
+bb:
+ %i2 = zext i32 %i to i64
+ %i3 = getelementptr inbounds float, ptr %arg, i64 %i2
+ store <vscale x 2 x float> zeroinitializer, ptr %i3, align 16
+ %i5 = add nuw nsw i32 %i, 1
+ %i6 = zext i32 %i5 to i64
+ %i7 = getelementptr inbounds float, ptr %arg, i64 %i6
+ store <vscale x 2 x float> zeroinitializer, ptr %i7, align 16
+ ret void
+}
+
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index c9a0943de8cd98..e89d6dea164750 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -655,3 +655,49 @@ exit:
call void @use(ptr %p) argmemonly
ret void
}
+
+define void @scalable_scalable_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @scalable_scalable_redundant_store(
+; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
+; CHECK-NEXT: ret void
+;
+ %gep = getelementptr i64, ptr %ptr, i64 2
+ store <vscale x 2 x i64> zeroinitializer, ptr %gep
+ store <vscale x 4 x i64> zeroinitializer, ptr %ptr
+ ret void
+}
+
+define void @scalable_fixed_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @scalable_fixed_redundant_store(
+; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[PTR:%.*]], align 32
+; CHECK-NEXT: ret void
+;
+ %gep = getelementptr i64, ptr %ptr, i64 2
+ store <2 x i64> zeroinitializer, ptr %gep
+ store <vscale x 4 x i64> zeroinitializer, ptr %ptr
+ ret void
+}
+
+define void @fixed_scalable_redundant_store(ptr %ptr) {
+; CHECK-LABEL: @fixed_scalable_redundant_store(
+; CHECK-NEXT: store <128 x i64> zeroinitializer, ptr [[PTR:%.*]], align 1024
+; CHECK-NEXT: ret void
+;
+ %gep = getelementptr i64, ptr %ptr, i64 2
+ store <vscale x 2 x i64> zeroinitializer, ptr %gep
+ store <128 x i64> zeroinitializer, ptr %ptr
+ ret void
+}
+
+define void @scalable_scalable_neg(ptr %ptr) {
+; CHECK-LABEL: @scalable_scalable_neg(
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT: store <vscale x 4 x i64> zeroinitializer, ptr [[GEP]], align 32
+; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT: ret void
+;
+ %gep = getelementptr i64, ptr %ptr, i64 8
+ store <vscale x 4 x i64> zeroinitializer, ptr %gep
+ store <vscale x 2 x i64> zeroinitializer, ptr %ptr
+ ret void
+}
|
|
Gentle ping. Can we verify whether or not this is correct? |
llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
Outdated
Show resolved
Hide resolved
3e8b008 to
7d27d23
Compare
|
Gentle ping. |
|
Gentle ping. |
1 similar comment
|
Gentle ping. |
As AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination.
7d27d23 to
7f0bf5f
Compare
|
Gentle ping. |
2 similar comments
|
Gentle ping. |
|
Gentle ping. |
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there reason to believe this is useful in practice?
Can we just do the trivial generalization here (the MustAlias case) without introducing more complex reasoning?
Will llvm-opt-benchmark tell us? I've submitted a job, and it should appear here shortly. |
|
llvm-opt-benchmark uses an x86_64 target, which doesn't have scalable vectors, so no. |
Okay, I have a proposal: in the morning, I will cut down the size of this patch by restricting it to MustAlias. Then, I could run llvm-test-suite offline with the remainder of the patch (i.e. the PartialAlias) on the RISC-V target and check for binary changes: if there are any, I will post the diff with a follow up patch. |
|
IIUC scalable vectors will in most cases only be introduced by LoopVectorize, so you might have to do LTO to have a chance to tigger this in practice? |
I see: doesn't DSE run after LV in the normal compiler flow at all? |
As AliasAnalysis now has support for scalable sizes, lift the limitation on analyzing scalable sizes in DeadStoreElimination.