Skip to content

Commit 4d014b8

Browse files
committed
Update location; exclude invariant (and simple) loads from memory inst scheduler data list.
Add volatile test.
1 parent 513c986 commit 4d014b8

File tree

2 files changed

+51
-18
lines changed

2 files changed

+51
-18
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21412,7 +21412,18 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
2141221412
"new ScheduleData already in scheduling region");
2141321413
SD->init(SchedulingRegionID, I);
2141421414

21415+
auto CanIgnoreLoad = [](const Instruction *I) {
21416+
const LoadInst *LI = dyn_cast<LoadInst>(I);
21417+
// If there is a simple load marked as invariant, we can ignore it.
21418+
// But, in the (unlikely) case of non-simple invariant load,
21419+
// we should not ignore it.
21420+
return LI && LI->isSimple() &&
21421+
LI->getMetadata(LLVMContext::MD_invariant_load);
21422+
};
21423+
2141521424
if (I->mayReadOrWriteMemory() &&
21425+
// Simple InvariantLoad does not depend on other memory accesses.
21426+
!CanIgnoreLoad(I) &&
2141621427
(!isa<IntrinsicInst>(I) ||
2141721428
(cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect &&
2141821429
cast<IntrinsicInst>(I)->getIntrinsicID() !=
@@ -21623,17 +21634,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2162321634
}
2162421635
}
2162521636

21626-
// Helper to detect loads marked with !invariant.load metadata. Such loads
21627-
// are defined to read from memory that never changes for the lifetime of
21628-
// the program; any store to the same location would be UB. Therefore we
21629-
// can conservatively treat an invariant load and any store as non-aliasing
21630-
// for scheduling/dep purposes and skip creating a dependency edge.
21631-
auto IsInvariantLoad = [](const Instruction *I) {
21632-
if (const auto *LI = dyn_cast<LoadInst>(I))
21633-
return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
21634-
return false;
21635-
};
21636-
2163721637
// Handle the memory dependencies (if any).
2163821638
ScheduleData *NextLoadStore = BundleMember->getNextLoadStore();
2163921639
if (!NextLoadStore)
@@ -21647,15 +21647,10 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2164721647
unsigned DistToSrc = 1;
2164821648
bool IsNonSimpleSrc = !SrcLoc.Ptr || !isSimple(SrcInst);
2164921649

21650-
if (IsInvariantLoad(SrcInst))
21651-
return; // Invariant load cannot have memory dependencies.
21652-
2165321650
for (ScheduleData *DepDest = NextLoadStore; DepDest;
2165421651
DepDest = DepDest->getNextLoadStore()) {
2165521652
assert(isInSchedulingRegion(*DepDest) && "Expected to be in region");
2165621653

21657-
Instruction *DestInst = DepDest->getInst();
21658-
2165921654
// We have two limits to reduce the complexity:
2166021655
// 1) AliasedCheckLimit: It's a small limit to reduce calls to
2166121656
// SLP->isAliased (which is the expensive part in this loop).
@@ -21664,8 +21659,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2166421659
// It's important for the loop break condition (see below) to
2166521660
// check this limit even between two read-only instructions.
2166621661
if (DistToSrc >= MaxMemDepDistance ||
21667-
(!IsInvariantLoad(DestInst) && // Cannot have memory deps.
21668-
(SrcMayWrite || DepDest->getInst()->mayWriteToMemory()) &&
21662+
((SrcMayWrite || DepDest->getInst()->mayWriteToMemory()) &&
2166921663
(IsNonSimpleSrc || NumAliased >= AliasedCheckLimit ||
2167021664
SLP->isAliased(SrcLoc, SrcInst, DepDest->getInst())))) {
2167121665

llvm/test/Transforms/SLPVectorizer/AMDGPU/invariant-load-no-alias-store.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,45 @@ entry:
7373
ret void
7474
}
7575

76+
define void @voltest(ptr addrspace(1) %base, ptr addrspace(1) %otherA, ptr addrspace(1) %otherB) #0 {
77+
; CHECK-LABEL: define void @voltest(
78+
; CHECK-SAME: ptr addrspace(1) [[BASE:%.*]], ptr addrspace(1) [[OTHERA:%.*]], ptr addrspace(1) [[OTHERB:%.*]]) #[[ATTR0]] {
79+
; CHECK-NEXT: [[ENTRY:.*:]]
80+
; CHECK-NEXT: [[P0:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 0
81+
; CHECK-NEXT: [[P1:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 1
82+
; CHECK-NEXT: [[A0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 0
83+
; CHECK-NEXT: [[B0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 0
84+
; CHECK-NEXT: [[A0:%.*]] = load volatile half, ptr addrspace(1) [[A0PTR]], align 2, !invariant.load [[META0]]
85+
; CHECK-NEXT: [[B0:%.*]] = load volatile half, ptr addrspace(1) [[B0PTR]], align 2, !invariant.load [[META0]]
86+
; CHECK-NEXT: [[ADD0:%.*]] = fadd reassoc half [[A0]], [[B0]]
87+
; CHECK-NEXT: store half [[ADD0]], ptr addrspace(1) [[P0]], align 2
88+
; CHECK-NEXT: [[A1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 1
89+
; CHECK-NEXT: [[B1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 1
90+
; CHECK-NEXT: [[A1:%.*]] = load volatile half, ptr addrspace(1) [[A1PTR]], align 2, !invariant.load [[META0]]
91+
; CHECK-NEXT: [[B1:%.*]] = load volatile half, ptr addrspace(1) [[B1PTR]], align 2, !invariant.load [[META0]]
92+
; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc half [[A1]], [[B1]]
93+
; CHECK-NEXT: store half [[ADD1]], ptr addrspace(1) [[P1]], align 2
94+
; CHECK-NEXT: ret void
95+
;
96+
entry:
97+
%p0 = getelementptr half, ptr addrspace(1) %base, i32 0
98+
%p1 = getelementptr half, ptr addrspace(1) %base, i32 1
99+
; First pair of invariant loads from otherA.
100+
%A0PTR = getelementptr half, ptr addrspace(1) %otherA, i32 0
101+
%B0PTR = getelementptr half, ptr addrspace(1) %otherB, i32 0
102+
%A0 = load volatile half, ptr addrspace(1) %A0PTR, align 2, !invariant.load !0
103+
%B0 = load volatile half, ptr addrspace(1) %B0PTR, align 2, !invariant.load !0
104+
%add0 = fadd reassoc half %A0, %B0
105+
store half %add0, ptr addrspace(1) %p0, align 2
106+
%A1PTR = getelementptr half, ptr addrspace(1) %otherA, i32 1
107+
%B1PTR = getelementptr half, ptr addrspace(1) %otherB, i32 1
108+
%A1 = load volatile half, ptr addrspace(1) %A1PTR, align 2, !invariant.load !0
109+
%B1 = load volatile half, ptr addrspace(1) %B1PTR, align 2, !invariant.load !0
110+
%add1 = fadd reassoc half %A1, %B1
111+
store half %add1, ptr addrspace(1) %p1, align 2
112+
ret void
113+
}
114+
76115

77116
attributes #0 = { nounwind }
78117

0 commit comments

Comments
 (0)