Skip to content

[InferAlignment] Propagate alignment between loads/stores of the same base pointer #145733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/test/CodeGen/attr-counted-by-for-pointers.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct annotated_ptr {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]]
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
Expand Down Expand Up @@ -85,7 +85,7 @@ void test1(struct annotated_ptr *p, int index, struct foo *value) {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
Expand Down Expand Up @@ -138,7 +138,7 @@ void test2(struct annotated_ptr *p, int index, struct foo *value) {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT10:%.*]], !prof [[PROF15:![0-9]+]], !nosanitize [[META2]]
Expand Down Expand Up @@ -311,7 +311,7 @@ size_t test6(struct annotated_ptr *p, int index) {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
// SANITIZE-WITH-ATTR-NEXT: [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 8
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT10:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
Expand Down
2 changes: 1 addition & 1 deletion clang/test/OpenMP/bug57757.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void foo() {
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 48
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias [[META13]]
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA16]], !noalias [[META13]]
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 8, !tbaa [[TBAA16]], !noalias [[META13]]
// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias [[META13]]
// CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias [[META13]]
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
Expand Down
49 changes: 45 additions & 4 deletions llvm/lib/Transforms/Scalar/InferAlignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,55 @@ bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
}

// Compute alignment from known bits.
auto InferFromKnownBits = [&](Instruction &I, Value *PtrOp) {
KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT);
unsigned TrailZ =
std::min(Known.countMinTrailingZeros(), +Value::MaxAlignmentExponent);
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
};

// Propagate alignment between loads and stores that originate from the
// same base pointer.
DenseMap<Value *, Align> BestBasePointerAligns;
auto InferFromBasePointer = [&](Value *PtrOp, Align LoadStoreAlign) {
APInt OffsetFromBase(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(DL, OffsetFromBase, true);
// Derive the base pointer alignment from the load/store alignment
// and the offset from the base pointer.
Align BasePointerAlign =
commonAlignment(LoadStoreAlign, OffsetFromBase.getLimitedValue());

auto [It, Inserted] =
BestBasePointerAligns.try_emplace(PtrOp, BasePointerAlign);
if (!Inserted) {
// If the stored base pointer alignment is better than the
// base pointer alignment we derived, we may be able to use it
// to improve the load/store alignment. If not, store the
// improved base pointer alignment for future iterations.
if (It->second > BasePointerAlign) {
Align BetterLoadStoreAlign =
commonAlignment(It->second, OffsetFromBase.getLimitedValue());
return BetterLoadStoreAlign;
}
It->second = BasePointerAlign;
}
return LoadStoreAlign;
};

for (BasicBlock &BB : F) {
// We need to reset the map for each block because alignment information
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, the problem here is worse than described. The code as written appears to be correct, I'm just pointing out a conceptual problem which may need reflecting in comments, etc..

Consider this:

%v = load i8, ptr %p, align 4
call void @throw_if_unaligned(%p, 16)
store i8 %v, ptr %p, align 16

Propagating the alignment forward is sound, but propagating it backwards (over the possibly throwing call) is not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call out, thank you for pointing that out! This tracks with my understanding of why a backwards propagation worked in the LSV but doesn't work here: the LSV analyzes within the scope of what it calls a "pseudo basic block", which is defined as follows:

  /// Runs the vectorizer on a "pseudo basic block", which is a range of
  /// instructions [Begin, End) within one BB all of which have
  /// isGuaranteedToTransferExecutionToSuccessor(I) == true.

Anyway, I can adjust the comment to call out your example. And just to confirm, the hypothetical dominator tree approach described in my comment would still be correct, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the comment. Let me know if it looks accurate.

// can't be propagated across blocks. This is because control flow could
// be dependent on the address at runtime, making an alignment assumption
// within one block not true in another. Some sort of dominator tree
// approach could be better, but restricting within a basic block is correct
// too.
BestBasePointerAligns.clear();

for (Instruction &I : BB) {
Changed |= tryToImproveAlign(
DL, &I, [&](Value *PtrOp, Align OldAlign, Align PrefAlign) {
KnownBits Known = computeKnownBits(PtrOp, DL, &AC, &I, &DT);
unsigned TrailZ = std::min(Known.countMinTrailingZeros(),
+Value::MaxAlignmentExponent);
return Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
return std::max(InferFromKnownBits(I, PtrOp),
InferFromBasePointer(PtrOp, OldAlign));
});
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 }
%struct.float3 = type { float, float, float }


; ------------------------------------------------------------------------------
; Test that we can propagate the align 16 to the load and store that are set to align 4
; ------------------------------------------------------------------------------

define void @prop_align(ptr %v, ptr %vout) {
; CHECK-LABEL: define void @prop_align(
; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]]) {
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16
; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4
; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8
; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8
; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4
; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16
; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20
; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4
; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24
; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8
; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28
; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4
; CHECK-NEXT: ret void
;
%.unpack.unpack = load float, ptr %v, align 16
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12
%.unpack2.unpack = load float, ptr %.elt1, align 4
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24
%.unpack4 = load i32, ptr %.elt3, align 8
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28
%.unpack6 = load i32, ptr %.elt5, align 4
store float %.unpack.unpack, ptr %vout, align 16
%vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4
store float %.unpack.unpack8, ptr %vout.repack23, align 4
%vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8
store float %.unpack.unpack10, ptr %vout.repack25, align 8
%vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12
store float %.unpack2.unpack, ptr %vout.repack17, align 4
%vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16
store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4
%vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20
store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4
%vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24
store i32 %.unpack4, ptr %vout.repack19, align 8
%vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28
store i32 %.unpack6, ptr %vout.repack21, align 4
ret void
}

; ------------------------------------------------------------------------------
; Test that alignment is not propagated from a source that does not dominate the destination
; ------------------------------------------------------------------------------

define void @no_prop_align(ptr %v, ptr %vout, i1 %cond) {
; CHECK-LABEL: define void @no_prop_align(
; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]]
; CHECK: [[BRANCH1]]:
; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16
; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4
; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12
; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4
; CHECK-NEXT: br label %[[END:.*]]
; CHECK: [[BRANCH2]]:
; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16
; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4
; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20
; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4
; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24
; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8
; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28
; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
br i1 %cond, label %branch1, label %branch2

branch1:
%.unpack.unpack = load float, ptr %v, align 16
%.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4
%.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4
%.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8
%.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8
%.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12
%.unpack2.unpack = load float, ptr %.elt1, align 4
br label %end

branch2:
%.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16
%.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4
%.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20
%.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4
%.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24
%.unpack4 = load i32, ptr %.elt3, align 8
%.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28
%.unpack6 = load i32, ptr %.elt5, align 4
br label %end

end:
ret void
}

; ------------------------------------------------------------------------------
; Test that we can propagate to/from negative offset GEPs
; ------------------------------------------------------------------------------

define void @prop_align_negative_offset(ptr %v) {
; CHECK-LABEL: define void @prop_align_negative_offset(
; CHECK-SAME: ptr [[V:%.*]]) {
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16
; CHECK-NEXT: ret void
;
%loadAligned= load float, ptr %v, align 16
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16
%loadUnaligned = load float, ptr %gepNegative, align 4
ret void
}

define void @prop_align_negative_offset_2(ptr %v) {
; CHECK-LABEL: define void @prop_align_negative_offset_2(
; CHECK-SAME: ptr [[V:%.*]]) {
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[V]], align 16
; CHECK-NEXT: ret void
;
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16
%loadAligned = load float, ptr %gepNegative, align 16
%loadUnaligned= load float, ptr %v, align 4
ret void
}

define void @prop_align_negative_offset_3(ptr %v) {
; CHECK-LABEL: define void @prop_align_negative_offset_3(
; CHECK-SAME: ptr [[V:%.*]]) {
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -8
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 8
; CHECK-NEXT: ret void
;
%loadAligned= load float, ptr %v, align 16
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -8
%loadUnaligned = load float, ptr %gepNegative, align 4
ret void
}

define void @prop_align_negative_offset_4(ptr %v) {
; CHECK-LABEL: define void @prop_align_negative_offset_4(
; CHECK-SAME: ptr [[V:%.*]]) {
; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16
; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -20
; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 4
; CHECK-NEXT: ret void
;
%loadAligned= load float, ptr %v, align 16
%gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -20
%loadUnaligned = load float, ptr %gepNegative, align 4
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 8, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
Expand Down