-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[InferAlignment] Propagate alignment between loads/stores of the same base pointer #145733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
15600f9
b905f1c
cd9e9f6
9c99e0a
8db304a
d696eee
c53e595
39dc009
2f73c04
3e55c80
1ef4008
622cf48
4a05b2e
5d15526
8d0d3fc
c6eb67a
215d65f
0025877
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,14 +58,56 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) { | |
} | ||
|
||
// Compute alignment from known bits. | ||
auto InferFromKnownBits = [&](Instruction &I, Value *PtrOp) { | ||
KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT); | ||
unsigned TrailZ = | ||
std::min(Known.countMinTrailingZeros(), +Value::MaxAlignmentExponent); | ||
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); | ||
}; | ||
|
||
// Propagate alignment between loads and stores that originate from the | ||
// same base pointer. | ||
DenseMap<Value *, Align> BestBasePointerAligns; | ||
auto InferFromBasePointer = [&](Value *PtrOp, Align LoadStoreAlign) { | ||
APInt OffsetFromBase = | ||
APInt(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0); | ||
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(DL, OffsetFromBase, true); | ||
// Derive the base pointer alignment from the load/store alignment | ||
// and the offset from the base pointer. | ||
Align BasePointerAlign = | ||
commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue()); | ||
|
||
auto [It, Inserted] = | ||
BestBasePointerAligns.try_emplace(PtrOp, BasePointerAlign); | ||
if (!Inserted) { | ||
// If the stored base pointer alignment is better than the | ||
// base pointer alignment we derived, we may be able to use it | ||
// to improve the load/store alignment. If not, store the | ||
// improved base pointer alignment for future iterations. | ||
if (It->second > BasePointerAlign) { | ||
Align BetterLoadStoreAlign = | ||
commonAlignment(It->second, OffsetFromBase.getLimitedValue()); | ||
return BetterLoadStoreAlign; | ||
} | ||
It->second = BasePointerAlign; | ||
} | ||
return LoadStoreAlign; | ||
}; | ||
|
||
for (BasicBlock &BB : F) { | ||
// We need to reset the map for each block because alignment information | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, the problem here is worse than described. The code as written appears to be correct, I'm just pointing out a conceptual problem which may need reflecting in comments, etc.. Consider this:
Propagating the alignment forward is sound, but propagating it backwards (over the possibly throwing call) is not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call out, thank you for pointing that out! This tracks with my understanding of why a backwards propagation worked in the LSV but doesn't work here: the LSV analyzes within the scope of what it calls a "pseudo basic block", which is defined as follows:
Anyway, I can adjust the comment to call out your example. And just to confirm, the hypothetical dominator tree approach described in my comment would still be correct, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated the comment. Let me know if it looks accurate. |
||
// can't be propagated across blocks. This is because control flow could | ||
// be dependent on the address at runtime, making an alignment assumption | ||
// within one block not true in another. Some sort of dominator tree | ||
// approach could be better, but restricting within a basic block is correct | ||
// too. | ||
BestBasePointerAligns.clear(); | ||
|
||
for (Instruction &I : BB) { | ||
Changed |= tryToImproveAlign( | ||
DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) { | ||
KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT); | ||
unsigned TrailZ = std::min(Known.countMinTrailingZeros(), | ||
+Value::MaxAlignmentExponent); | ||
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ)); | ||
return std::max(InferFromKnownBits(I, PtrOp), | ||
InferFromBasePointer(PtrOp, OldAlign)); | ||
}); | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s | ||
%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 } | ||
%struct.float3 = type { float, float, float } | ||
|
||
|
||
; ------------------------------------------------------------------------------ | ||
; Test that we can propagate the align 16 to the load and store that are set to align 4 | ||
; ------------------------------------------------------------------------------ | ||
|
||
define void @prop_align(ptr %v, ptr %vout) { | ||
; CHECK-LABEL: define void @prop_align( | ||
; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]]) { | ||
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 | ||
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 | ||
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 | ||
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 | ||
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 | ||
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 | ||
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 | ||
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 | ||
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16 | ||
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 | ||
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 | ||
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 | ||
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 | ||
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 | ||
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 | ||
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16 | ||
; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4 | ||
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4 | ||
; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8 | ||
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8 | ||
; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12 | ||
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4 | ||
; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16 | ||
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16 | ||
; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20 | ||
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4 | ||
; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24 | ||
; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8 | ||
; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28 | ||
; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4 | ||
; CHECK-NEXT: ret void | ||
; | ||
%.unpack.unpack = load float, ptr %v, align 16 | ||
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 | ||
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 | ||
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 | ||
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 | ||
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 | ||
%.unpack2.unpack = load float, ptr %.elt1, align 4 | ||
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 | ||
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 | ||
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 | ||
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 | ||
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 | ||
%.unpack4 = load i32, ptr %.elt3, align 8 | ||
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 | ||
%.unpack6 = load i32, ptr %.elt5, align 4 | ||
store float %.unpack.unpack, ptr %vout, align 16 | ||
%vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4 | ||
store float %.unpack.unpack8, ptr %vout.repack23, align 4 | ||
%vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8 | ||
store float %.unpack.unpack10, ptr %vout.repack25, align 8 | ||
%vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12 | ||
store float %.unpack2.unpack, ptr %vout.repack17, align 4 | ||
%vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16 | ||
store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4 | ||
%vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20 | ||
store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4 | ||
%vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24 | ||
store i32 %.unpack4, ptr %vout.repack19, align 8 | ||
%vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28 | ||
store i32 %.unpack6, ptr %vout.repack21, align 4 | ||
ret void | ||
} | ||
|
||
; ------------------------------------------------------------------------------ | ||
; Test that alignment is not propagated from a source that does not dominate the destination | ||
; ------------------------------------------------------------------------------ | ||
|
||
define void @no_prop_align(ptr %v, ptr %vout, i1 %cond) { | ||
; CHECK-LABEL: define void @no_prop_align( | ||
; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]], i1 [[COND:%.*]]) { | ||
; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]] | ||
; CHECK: [[BRANCH1]]: | ||
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 | ||
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 | ||
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 | ||
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 | ||
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 | ||
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 | ||
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 | ||
; CHECK-NEXT: br label %[[END:.*]] | ||
; CHECK: [[BRANCH2]]: | ||
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 | ||
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4 | ||
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 | ||
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 | ||
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 | ||
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 | ||
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 | ||
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 | ||
; CHECK-NEXT: br label %[[END]] | ||
; CHECK: [[END]]: | ||
; CHECK-NEXT: ret void | ||
; | ||
br i1 %cond, label %branch1, label %branch2 | ||
|
||
branch1: | ||
%.unpack.unpack = load float, ptr %v, align 16 | ||
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 | ||
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 | ||
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 | ||
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 | ||
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 | ||
%.unpack2.unpack = load float, ptr %.elt1, align 4 | ||
br label %end | ||
|
||
branch2: | ||
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 | ||
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 | ||
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 | ||
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 | ||
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 | ||
%.unpack4 = load i32, ptr %.elt3, align 8 | ||
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 | ||
%.unpack6 = load i32, ptr %.elt5, align 4 | ||
br label %end | ||
|
||
end: | ||
ret void | ||
} |
Uh oh!
There was an error while loading. Please reload this page.