Skip to content

Commit 80e86c5

Browse files
dakersnarnikic
authored andcommitted
Automerge: [InferAlignment] Propagate alignment between loads/stores of the same base pointer (#145733)
We can derive and upgrade alignment for loads/stores using other well-aligned loads/stores. This optimization does a single forward pass through each basic block and uses loads/stores (the alignment and the offset) to derive the best possible alignment for a base pointer, caching the result. If it encounters another load/store based on that pointer, it tries to upgrade the alignment. The optimization must be a forward pass within a basic block because control flow and exception throwing can impact alignment guarantees. --------- Co-authored-by: Nikita Popov <[email protected]>
2 parents 5f67a34 + 90e8c8e commit 80e86c5

File tree

5 files changed

+245
-10
lines changed

5 files changed

+245
-10
lines changed

clang/test/CodeGen/attr-counted-by-for-pointers.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct annotated_ptr {
3232
// SANITIZE-WITH-ATTR-NEXT: entry:
3333
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
3434
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
35-
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
35+
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
3636
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]]
3737
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
3838
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
@@ -85,7 +85,7 @@ void test1(struct annotated_ptr *p, int index, struct foo *value) {
8585
// SANITIZE-WITH-ATTR-NEXT: entry:
8686
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
8787
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
88-
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
88+
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
8989
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
9090
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
9191
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
@@ -138,7 +138,7 @@ void test2(struct annotated_ptr *p, int index, struct foo *value) {
138138
// SANITIZE-WITH-ATTR-NEXT: entry:
139139
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
140140
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
141-
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
141+
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
142142
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
143143
// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
144144
// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT10:%.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]]
@@ -311,7 +311,7 @@ size_t test6(struct annotated_ptr *p, int index) {
311311
// SANITIZE-WITH-ATTR-NEXT: entry:
312312
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
313313
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
314-
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
314+
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
315315
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
316316
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
317317
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]

clang/test/OpenMP/bug57757.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void foo() {
4646
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
4747
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
4848
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias [[META13]]
49-
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA16]], !noalias [[META13]]
49+
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[TBAA16]], !noalias [[META13]]
5050
// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias [[META13]]
5151
// CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias [[META13]]
5252
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]

llvm/lib/Transforms/Scalar/InferAlignment.cpp

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,55 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
5858
}
5959

6060
// Compute alignment from known bits.
61+
auto InferFromKnownBits = [&](Instruction &I, Value *PtrOp) {
62+
KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT);
63+
unsigned TrailZ =
64+
std::min(Known.countMinTrailingZeros(), +Value::MaxAlignmentExponent);
65+
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
66+
};
67+
68+
// Propagate alignment between loads and stores that originate from the
69+
// same base pointer.
70+
DenseMap<Value *, Align> BestBasePointerAligns;
71+
auto InferFromBasePointer = [&](Value *PtrOp, Align LoadStoreAlign) {
72+
APInt OffsetFromBase(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
73+
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(DL, OffsetFromBase, true);
74+
// Derive the base pointer alignment from the load/store alignment
75+
// and the offset from the base pointer.
76+
Align BasePointerAlign =
77+
commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue());
78+
79+
auto [It, Inserted] =
80+
BestBasePointerAligns.try_emplace(PtrOp, BasePointerAlign);
81+
if (!Inserted) {
82+
// If the stored base pointer alignment is better than the
83+
// base pointer alignment we derived, we may be able to use it
84+
// to improve the load/store alignment. If not, store the
85+
// improved base pointer alignment for future iterations.
86+
if (It->second > BasePointerAlign) {
87+
Align BetterLoadStoreAlign =
88+
commonAlignment(It->second, OffsetFromBase.getLimitedValue());
89+
return BetterLoadStoreAlign;
90+
}
91+
It->second = BasePointerAlign;
92+
}
93+
return LoadStoreAlign;
94+
};
95+
6196
for (BasicBlock &BB : F) {
97+
// We need to reset the map for each block because alignment information
98+
// can only be propagated from instruction A to B if A dominates B.
99+
// This is because control flow (and exception throwing) could be dependent
100+
// on the address (and its alignment) at runtime. Some sort of dominator
101+
// tree approach could be better, but doing a simple forward pass through a
102+
// single basic block is correct too.
103+
BestBasePointerAligns.clear();
104+
62105
for (Instruction &I : BB) {
63106
Changed |= tryToImproveAlign(
64107
DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
65-
KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT);
66-
unsigned TrailZ = std::min(Known.countMinTrailingZeros(),
67-
+Value::MaxAlignmentExponent);
68-
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
108+
return std::max(InferFromKnownBits(I, PtrOp),
109+
InferFromBasePointer(PtrOp, OldAlign));
69110
});
70111
}
71112
}
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
3+
%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 }
4+
%struct.float3 = type { float, float, float }
5+
6+
7+
; ------------------------------------------------------------------------------
8+
; Test that we can propagate the align 16 to the load and store that are set to align 4
9+
; ------------------------------------------------------------------------------
10+
11+
define void @prop_align(ptr %v, ptr %vout) {
12+
; CHECK-LABEL: define void @prop_align(
13+
; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]]) {
14+
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16
15+
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
16+
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4
17+
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
18+
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8
19+
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12
20+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4
21+
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16
22+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16
23+
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20
24+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4
25+
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24
26+
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8
27+
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28
28+
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4
29+
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16
30+
; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4
31+
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4
32+
; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8
33+
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8
34+
; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12
35+
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4
36+
; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16
37+
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16
38+
; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20
39+
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4
40+
; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24
41+
; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8
42+
; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28
43+
; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4
44+
; CHECK-NEXT: ret void
45+
;
46+
%.unpack.unpack = load float, ptr %v, align 16
47+
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4
48+
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4
49+
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8
50+
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8
51+
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12
52+
%.unpack2.unpack = load float, ptr %.elt1, align 4
53+
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16
54+
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4
55+
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20
56+
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4
57+
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24
58+
%.unpack4 = load i32, ptr %.elt3, align 8
59+
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28
60+
%.unpack6 = load i32, ptr %.elt5, align 4
61+
store float %.unpack.unpack, ptr %vout, align 16
62+
%vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4
63+
store float %.unpack.unpack8, ptr %vout.repack23, align 4
64+
%vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8
65+
store float %.unpack.unpack10, ptr %vout.repack25, align 8
66+
%vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12
67+
store float %.unpack2.unpack, ptr %vout.repack17, align 4
68+
%vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16
69+
store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4
70+
%vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20
71+
store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4
72+
%vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24
73+
store i32 %.unpack4, ptr %vout.repack19, align 8
74+
%vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28
75+
store i32 %.unpack6, ptr %vout.repack21, align 4
76+
ret void
77+
}
78+
79+
; ------------------------------------------------------------------------------
80+
; Test that alignment is not propagated from a source that does not dominate the destination
81+
; ------------------------------------------------------------------------------
82+
83+
define void @no_prop_align(ptr %v, ptr %vout, i1 %cond) {
84+
; CHECK-LABEL: define void @no_prop_align(
85+
; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]], i1 [[COND:%.*]]) {
86+
; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]]
87+
; CHECK: [[BRANCH1]]:
88+
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16
89+
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
90+
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4
91+
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
92+
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8
93+
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12
94+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4
95+
; CHECK-NEXT: br label %[[END:.*]]
96+
; CHECK: [[BRANCH2]]:
97+
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16
98+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4
99+
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20
100+
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4
101+
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24
102+
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8
103+
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28
104+
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4
105+
; CHECK-NEXT: br label %[[END]]
106+
; CHECK: [[END]]:
107+
; CHECK-NEXT: ret void
108+
;
109+
br i1 %cond, label %branch1, label %branch2
110+
111+
branch1:
112+
%.unpack.unpack = load float, ptr %v, align 16
113+
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4
114+
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4
115+
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8
116+
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8
117+
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12
118+
%.unpack2.unpack = load float, ptr %.elt1, align 4
119+
br label %end
120+
121+
branch2:
122+
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16
123+
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4
124+
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20
125+
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4
126+
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24
127+
%.unpack4 = load i32, ptr %.elt3, align 8
128+
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28
129+
%.unpack6 = load i32, ptr %.elt5, align 4
130+
br label %end
131+
132+
end:
133+
ret void
134+
}
135+
136+
; ------------------------------------------------------------------------------
137+
; Test that we can propagate to/from negative offset GEPs
138+
; ------------------------------------------------------------------------------
139+
140+
define void @prop_align_negative_offset(ptr %v) {
141+
; CHECK-LABEL: define void @prop_align_negative_offset(
142+
; CHECK-SAME: ptr [[V:%.*]]) {
143+
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16
144+
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16
145+
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16
146+
; CHECK-NEXT: ret void
147+
;
148+
%loadAligned= load float, ptr %v, align 16
149+
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16
150+
%loadUnaligned = load float, ptr %gepNegative, align 4
151+
ret void
152+
}
153+
154+
define void @prop_align_negative_offset_2(ptr %v) {
155+
; CHECK-LABEL: define void @prop_align_negative_offset_2(
156+
; CHECK-SAME: ptr [[V:%.*]]) {
157+
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16
158+
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16
159+
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[V]], align 16
160+
; CHECK-NEXT: ret void
161+
;
162+
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16
163+
%loadAligned = load float, ptr %gepNegative, align 16
164+
%loadUnaligned= load float, ptr %v, align 4
165+
ret void
166+
}
167+
168+
define void @prop_align_negative_offset_3(ptr %v) {
169+
; CHECK-LABEL: define void @prop_align_negative_offset_3(
170+
; CHECK-SAME: ptr [[V:%.*]]) {
171+
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16
172+
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -8
173+
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 8
174+
; CHECK-NEXT: ret void
175+
;
176+
%loadAligned= load float, ptr %v, align 16
177+
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -8
178+
%loadUnaligned = load float, ptr %gepNegative, align 4
179+
ret void
180+
}
181+
182+
define void @prop_align_negative_offset_4(ptr %v) {
183+
; CHECK-LABEL: define void @prop_align_negative_offset_4(
184+
; CHECK-SAME: ptr [[V:%.*]]) {
185+
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16
186+
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -20
187+
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 4
188+
; CHECK-NEXT: ret void
189+
;
190+
%loadAligned= load float, ptr %v, align 16
191+
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -20
192+
%loadUnaligned = load float, ptr %gepNegative, align 4
193+
ret void
194+
}

llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
1313
; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
1414
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
1515
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
16-
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
16+
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 8, <1 x i1> [[TMP0]])
1717
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
1818
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
1919
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>

0 commit comments

Comments
 (0)