Skip to content

Commit 8db304a

Browse files
committed
Alternate approach to solve the problem in InferAlignment
1 parent 9c99e0a commit 8db304a

File tree

2 files changed

+177
-0
lines changed

2 files changed

+177
-0
lines changed

llvm/lib/Transforms/Scalar/InferAlignment.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,43 @@
2121

2222
using namespace llvm;
2323

24+
static bool tryToPropagateAlign(Function &F, const DataLayout &DL) {
25+
bool Changed = false;
26+
27+
for (BasicBlock &BB : F) {
28+
// We need to reset the map for each block because alignment information
29+
// can't be propagated across blocks. This is because control flow could
30+
// be dependent on the address at runtime, making an alignment assumption
31+
// within one block not true in another. Some sort of dominator tree
32+
// approach could be better, but restricting within a basic block is correct
33+
// too.
34+
DenseMap<Value *, Align> BestBasePointerAligns;
35+
for (Instruction &I : BB) {
36+
if (auto *PtrOp = getLoadStorePointerOperand(&I)) {
37+
Align LoadStoreAlign = getLoadStoreAlignment(&I);
38+
APInt OffsetFromBase = APInt(
39+
DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace()),
40+
0);
41+
PtrOp = PtrOp->stripAndAccumulateInBoundsConstantOffsets(
42+
DL, OffsetFromBase);
43+
Align BasePointerAlign =
44+
commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue());
45+
46+
if (BestBasePointerAligns.count(PtrOp) &&
47+
BestBasePointerAligns[PtrOp] > BasePointerAlign) {
48+
Align BetterLoadStoreAlign = commonAlignment(
49+
BestBasePointerAligns[PtrOp], OffsetFromBase.getLimitedValue());
50+
setLoadStoreAlignment(&I, BetterLoadStoreAlign);
51+
Changed = true;
52+
} else {
53+
BestBasePointerAligns[PtrOp] = BasePointerAlign;
54+
}
55+
}
56+
}
57+
}
58+
return Changed;
59+
}
60+
2461
static bool tryToImproveAlign(
2562
const DataLayout &DL, Instruction *I,
2663
function_ref<Align(Value *PtrOp, Align OldAlign, Align PrefAlign)> Fn) {
@@ -70,6 +107,10 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
70107
}
71108
}
72109

110+
// Propagate alignment between loads and stores that originate from the same
111+
// base pointer
112+
Changed |= tryToPropagateAlign(F, DL);
113+
73114
return Changed;
74115
}
75116

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
3+
%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 }
4+
%struct.float3 = type { float, float, float }
5+
6+
7+
; ------------------------------------------------------------------------------
8+
; Test that we can propagate the align 16 to the load and store that are set to align 4
9+
; ------------------------------------------------------------------------------
10+
11+
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
12+
define void @prop_align(ptr noundef readonly captures(none) %v, ptr noundef writeonly captures(none) initializes((0, 32)) %vout) local_unnamed_addr #0 {
13+
; CHECK-LABEL: define void @prop_align(
14+
; CHECK-SAME: ptr noundef readonly captures(none) [[V:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[VOUT:%.*]]) local_unnamed_addr {
15+
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16
16+
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
17+
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4
18+
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
19+
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8
20+
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12
21+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4
22+
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16
23+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16
24+
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20
25+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4
26+
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24
27+
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8
28+
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28
29+
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4
30+
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16
31+
; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4
32+
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4
33+
; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8
34+
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8
35+
; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12
36+
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4
37+
; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16
38+
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16
39+
; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20
40+
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4
41+
; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24
42+
; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8
43+
; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28
44+
; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4
45+
; CHECK-NEXT: ret void
46+
;
47+
%.unpack.unpack = load float, ptr %v, align 16
48+
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4
49+
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4
50+
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8
51+
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8
52+
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12
53+
%.unpack2.unpack = load float, ptr %.elt1, align 4
54+
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16
55+
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4
56+
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20
57+
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4
58+
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24
59+
%.unpack4 = load i32, ptr %.elt3, align 8
60+
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28
61+
%.unpack6 = load i32, ptr %.elt5, align 4
62+
store float %.unpack.unpack, ptr %vout, align 16
63+
%vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4
64+
store float %.unpack.unpack8, ptr %vout.repack23, align 4
65+
%vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8
66+
store float %.unpack.unpack10, ptr %vout.repack25, align 8
67+
%vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12
68+
store float %.unpack2.unpack, ptr %vout.repack17, align 4
69+
%vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16
70+
store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4
71+
%vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20
72+
store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4
73+
%vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24
74+
store i32 %.unpack4, ptr %vout.repack19, align 8
75+
%vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28
76+
store i32 %.unpack6, ptr %vout.repack21, align 4
77+
ret void
78+
}
79+
80+
; ------------------------------------------------------------------------------
81+
; Test that alignment is not propagated from a source that does not dominate the destination
82+
; ------------------------------------------------------------------------------
83+
84+
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
85+
define void @no_prop_align(ptr noundef readonly captures(none) %v, ptr noundef writeonly captures(none) initializes((0, 32)) %vout, i1 %cond) local_unnamed_addr #0 {
86+
; CHECK-LABEL: define void @no_prop_align(
87+
; CHECK-SAME: ptr noundef readonly captures(none) [[V:%.*]], ptr noundef writeonly captures(none) initializes((0, 32)) [[VOUT:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
88+
; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]]
89+
; CHECK: [[BRANCH1]]:
90+
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16
91+
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
92+
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4
93+
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
94+
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8
95+
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12
96+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4
97+
; CHECK-NEXT: br label %[[END:.*]]
98+
; CHECK: [[BRANCH2]]:
99+
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16
100+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4
101+
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20
102+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4
103+
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24
104+
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8
105+
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28
106+
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4
107+
; CHECK-NEXT: br label %[[END]]
108+
; CHECK: [[END]]:
109+
; CHECK-NEXT: ret void
110+
;
111+
br i1 %cond, label %branch1, label %branch2
112+
113+
branch1:
114+
%.unpack.unpack = load float, ptr %v, align 16
115+
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4
116+
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4
117+
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8
118+
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8
119+
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12
120+
%.unpack2.unpack = load float, ptr %.elt1, align 4
121+
br label %end
122+
123+
branch2:
124+
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16
125+
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4
126+
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20
127+
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4
128+
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24
129+
%.unpack4 = load i32, ptr %.elt3, align 8
130+
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28
131+
%.unpack6 = load i32, ptr %.elt5, align 4
132+
br label %end
133+
134+
end:
135+
ret void
136+
}

0 commit comments

Comments
 (0)