Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 39 additions & 21 deletions llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -886,35 +886,20 @@ static std::optional<PartStore> matchPartStore(Instruction &I,
return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
}

static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
const DataLayout &DL, TargetTransformInfo &TTI) {
static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts,
unsigned Width, const DataLayout &DL,
TargetTransformInfo &TTI) {
if (Parts.size() < 2)
return false;

// We now have multiple parts of the same value stored to the same pointer.
// Sort the parts by pointer offset, and make sure they are consistent with
// the value offsets. Also check that the value is fully covered without
// overlaps.
// FIXME: We could support merging stores for only part of the value here.
llvm::sort(Parts);
int64_t LastEndOffsetFromFirst = 0;
const PartStore &First = Parts[0];
for (const PartStore &Part : Parts) {
APInt PtrOffsetFromFirst = Part.PtrOffset - First.PtrOffset;
int64_t ValOffsetFromFirst = Part.ValOffset - First.ValOffset;
if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
LastEndOffsetFromFirst != ValOffsetFromFirst)
return false;
LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
}

// Check whether combining the stores is profitable.
// FIXME: We could generate smaller stores if we can't produce a large one.
const PartStore &First = Parts.front();
LLVMContext &Ctx = First.Store->getContext();
Type *NewTy = Type::getIntNTy(Ctx, LastEndOffsetFromFirst);
Type *NewTy = Type::getIntNTy(Ctx, Width);
unsigned Fast = 0;
if (!TTI.isTypeLegal(NewTy) ||
!TTI.allowsMisalignedMemoryAccesses(Ctx, LastEndOffsetFromFirst,
!TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
First.Store->getPointerAddressSpace(),
First.Store->getAlign(), &Fast) ||
!Fast)
Expand All @@ -941,6 +926,39 @@ static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
return true;
}

static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,
const DataLayout &DL, TargetTransformInfo &TTI) {
if (Parts.size() < 2)
return false;

// We now have multiple parts of the same value stored to the same pointer.
// Sort the parts by pointer offset, and make sure they are consistent with
// the value offsets. Also check that the value is fully covered without
// overlaps.
bool Changed = false;
llvm::sort(Parts);
int64_t LastEndOffsetFromFirst = 0;
const PartStore *First = &Parts[0];
for (const PartStore &Part : Parts) {
APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
LastEndOffsetFromFirst != ValOffsetFromFirst) {
Changed |= mergeConsecutivePartStores(ArrayRef(First, &Part),
LastEndOffsetFromFirst, DL, TTI);
First = &Part;
LastEndOffsetFromFirst = Part.ValWidth;
continue;
}

LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
}

Changed |= mergeConsecutivePartStores(ArrayRef(First, Parts.end()),
LastEndOffsetFromFirst, DL, TTI);
return Changed;
}

static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,
TargetTransformInfo &TTI, AliasAnalysis &AA) {
// FIXME: Add big endian support.
Expand Down
99 changes: 99 additions & 0 deletions llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,105 @@ define void @test_i32_tbaa(i32 %x, ptr %p) {
ret void
}

define void @test_multiple_parts_with_gap1(i32 %x, ptr %p) {
; CHECK-LABEL: define void @test_multiple_parts_with_gap1(
; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1
; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24
; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3
; CHECK-NEXT: store i8 [[X_3]], ptr [[GEP_3]], align 1
; CHECK-NEXT: ret void
;
%x.0 = trunc i32 %x to i8
store i8 %x.0, ptr %p
%shr.1 = lshr i32 %x, 8
%x.1 = trunc i32 %shr.1 to i8
%gep.1 = getelementptr i8, ptr %p, i64 1
store i8 %x.1, ptr %gep.1
%shr.3 = lshr i32 %x, 24
%x.3 = trunc i32 %shr.3 to i8
%gep.3 = getelementptr i8, ptr %p, i64 3
store i8 %x.3, ptr %gep.3
ret void
}

define void @test_multiple_parts_with_gap2(i32 %x, ptr %p) {
; CHECK-LABEL: define void @test_multiple_parts_with_gap2(
; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8
; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 16
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: store i16 [[TMP2]], ptr [[GEP_2]], align 1
; CHECK-NEXT: ret void
;
%x.0 = trunc i32 %x to i8
store i8 %x.0, ptr %p
%shr.2 = lshr i32 %x, 16
%x.2 = trunc i32 %shr.2 to i8
%gep.2 = getelementptr i8, ptr %p, i64 1
store i8 %x.2, ptr %gep.2
%shr.3 = lshr i32 %x, 24
%x.3 = trunc i32 %shr.3 to i8
%gep.3 = getelementptr i8, ptr %p, i64 2
store i8 %x.3, ptr %gep.3
ret void
}

define void @test_multiple_parts_with_gap3(i64 %x, ptr %p) {
; CHECK-LABEL: define void @test_multiple_parts_with_gap3(
; CHECK-SAME: i64 [[X:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i16
; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[X]], 24
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i16
; CHECK-NEXT: store i16 [[TMP3]], ptr [[GEP_3]], align 1
; CHECK-NEXT: ret void
;
%x.0 = trunc i64 %x to i8
store i8 %x.0, ptr %p
%shr.1 = lshr i64 %x, 8
%x.1 = trunc i64 %shr.1 to i8
%gep.1 = getelementptr i8, ptr %p, i64 1
store i8 %x.1, ptr %gep.1
%shr.3 = lshr i64 %x, 24
%x.3 = trunc i64 %shr.3 to i8
%gep.3 = getelementptr i8, ptr %p, i64 3
store i8 %x.3, ptr %gep.3
%shr.4 = lshr i64 %x, 32
%x.4 = trunc i64 %shr.4 to i8
%gep.4 = getelementptr i8, ptr %p, i64 4
store i8 %x.4, ptr %gep.4
ret void
}

define void @test_store_same_parts_twice(i32 %x, ptr %p) {
; CHECK-LABEL: define void @test_store_same_parts_twice(
; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[X]] to i16
; CHECK-NEXT: store i16 [[TMP2]], ptr [[GEP_2]], align 1
; CHECK-NEXT: ret void
;
%x.0 = trunc i32 %x to i8
store i8 %x.0, ptr %p
%shr.1 = lshr i32 %x, 8
%x.1 = trunc i32 %shr.1 to i8
%gep.1 = getelementptr i8, ptr %p, i64 1
store i8 %x.1, ptr %gep.1
%gep.2 = getelementptr i8, ptr %p, i64 2
store i8 %x.0, ptr %gep.2
%gep.3 = getelementptr i8, ptr %p, i64 3
store i8 %x.1, ptr %gep.3
ret void
}

!0 = !{!1}
!1 = !{!1, !2}
!2 = !{!2}
Expand Down
Loading