Skip to content

Commit 513c986

Browse files
committed
[SLP] Invariant loads cannot have a memory dependency on stores.
1 parent 0c0c1a7 commit 513c986

File tree

2 files changed

+100
-1
lines changed

2 files changed

+100
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21623,6 +21623,17 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2162321623
}
2162421624
}
2162521625

21626+
// Helper to detect loads marked with !invariant.load metadata. Such loads
21627+
// are defined to read from memory that never changes for the lifetime of
21628+
// the program; any store to the same location would be UB. Therefore we
21629+
// can conservatively treat an invariant load and any store as non-aliasing
21630+
// for scheduling/dep purposes and skip creating a dependency edge.
21631+
auto IsInvariantLoad = [](const Instruction *I) {
21632+
if (const auto *LI = dyn_cast<LoadInst>(I))
21633+
return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
21634+
return false;
21635+
};
21636+
2162621637
// Handle the memory dependencies (if any).
2162721638
ScheduleData *NextLoadStore = BundleMember->getNextLoadStore();
2162821639
if (!NextLoadStore)
@@ -21636,10 +21647,15 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2163621647
unsigned DistToSrc = 1;
2163721648
bool IsNonSimpleSrc = !SrcLoc.Ptr || !isSimple(SrcInst);
2163821649

21650+
if (IsInvariantLoad(SrcInst))
21651+
return; // Invariant load cannot have memory dependencies.
21652+
2163921653
for (ScheduleData *DepDest = NextLoadStore; DepDest;
2164021654
DepDest = DepDest->getNextLoadStore()) {
2164121655
assert(isInSchedulingRegion(*DepDest) && "Expected to be in region");
2164221656

21657+
Instruction *DestInst = DepDest->getInst();
21658+
2164321659
// We have two limits to reduce the complexity:
2164421660
// 1) AliasedCheckLimit: It's a small limit to reduce calls to
2164521661
// SLP->isAliased (which is the expensive part in this loop).
@@ -21648,7 +21664,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2164821664
// It's important for the loop break condition (see below) to
2164921665
// check this limit even between two read-only instructions.
2165021666
if (DistToSrc >= MaxMemDepDistance ||
21651-
((SrcMayWrite || DepDest->getInst()->mayWriteToMemory()) &&
21667+
(!IsInvariantLoad(DestInst) && // Cannot have memory deps.
21668+
(SrcMayWrite || DepDest->getInst()->mayWriteToMemory()) &&
2165221669
(IsNonSimpleSrc || NumAliased >= AliasedCheckLimit ||
2165321670
SLP->isAliased(SrcLoc, SrcInst, DepDest->getInst())))) {
2165421671

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes="function(slp-vectorizer)" -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 %s -S | FileCheck %s
3+
4+
define void @test(ptr addrspace(1) %base, ptr addrspace(1) %otherA, ptr addrspace(1) %otherB) #0 {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr addrspace(1) [[BASE:%.*]], ptr addrspace(1) [[OTHERA:%.*]], ptr addrspace(1) [[OTHERB:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[P0:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 0
9+
; CHECK-NEXT: [[A0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 0
10+
; CHECK-NEXT: [[B0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 0
11+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[A0PTR]], align 2, !invariant.load [[META0:![0-9]+]]
12+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[B0PTR]], align 2, !invariant.load [[META0]]
13+
; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <2 x half> [[TMP0]], [[TMP1]]
14+
; CHECK-NEXT: store <2 x half> [[TMP2]], ptr addrspace(1) [[P0]], align 2
15+
; CHECK-NEXT: ret void
16+
;
17+
entry:
18+
%p0 = getelementptr half, ptr addrspace(1) %base, i32 0
19+
%p1 = getelementptr half, ptr addrspace(1) %base, i32 1
20+
; First pair of invariant loads from otherA.
21+
%A0PTR = getelementptr half, ptr addrspace(1) %otherA, i32 0
22+
%B0PTR = getelementptr half, ptr addrspace(1) %otherB, i32 0
23+
%A0 = load half, ptr addrspace(1) %A0PTR, align 2, !invariant.load !0
24+
%B0 = load half, ptr addrspace(1) %B0PTR, align 2, !invariant.load !0
25+
%add0 = fadd reassoc half %A0, %B0
26+
store half %add0, ptr addrspace(1) %p0, align 2
27+
%A1PTR = getelementptr half, ptr addrspace(1) %otherA, i32 1
28+
%B1PTR = getelementptr half, ptr addrspace(1) %otherB, i32 1
29+
%A1 = load half, ptr addrspace(1) %A1PTR, align 2, !invariant.load !0
30+
%B1 = load half, ptr addrspace(1) %B1PTR, align 2, !invariant.load !0
31+
%add1 = fadd reassoc half %A1, %B1
32+
store half %add1, ptr addrspace(1) %p1, align 2
33+
ret void
34+
}
35+
36+
37+
define void @aliastest(ptr addrspace(1) %base, ptr addrspace(1) %otherA, ptr addrspace(1) %otherB) #0 {
38+
; CHECK-LABEL: define void @aliastest(
39+
; CHECK-SAME: ptr addrspace(1) [[BASE:%.*]], ptr addrspace(1) [[OTHERA:%.*]], ptr addrspace(1) [[OTHERB:%.*]]) #[[ATTR0]] {
40+
; CHECK-NEXT: [[ENTRY:.*:]]
41+
; CHECK-NEXT: [[P0:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 0
42+
; CHECK-NEXT: [[P1:%.*]] = getelementptr half, ptr addrspace(1) [[BASE]], i32 1
43+
; CHECK-NEXT: [[A0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 0
44+
; CHECK-NEXT: [[B0PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 0
45+
; CHECK-NEXT: [[A0:%.*]] = load half, ptr addrspace(1) [[A0PTR]], align 2
46+
; CHECK-NEXT: [[B0:%.*]] = load half, ptr addrspace(1) [[B0PTR]], align 2
47+
; CHECK-NEXT: [[ADD0:%.*]] = fadd reassoc half [[A0]], [[B0]]
48+
; CHECK-NEXT: store half [[ADD0]], ptr addrspace(1) [[P0]], align 2
49+
; CHECK-NEXT: [[A1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERA]], i32 1
50+
; CHECK-NEXT: [[B1PTR:%.*]] = getelementptr half, ptr addrspace(1) [[OTHERB]], i32 1
51+
; CHECK-NEXT: [[A1:%.*]] = load half, ptr addrspace(1) [[A1PTR]], align 2
52+
; CHECK-NEXT: [[B1:%.*]] = load half, ptr addrspace(1) [[B1PTR]], align 2
53+
; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc half [[A1]], [[B1]]
54+
; CHECK-NEXT: store half [[ADD1]], ptr addrspace(1) [[P1]], align 2
55+
; CHECK-NEXT: ret void
56+
;
57+
entry:
58+
%p0 = getelementptr half, ptr addrspace(1) %base, i32 0
59+
%p1 = getelementptr half, ptr addrspace(1) %base, i32 1
60+
; First pair of invariant loads from otherA.
61+
%A0PTR = getelementptr half, ptr addrspace(1) %otherA, i32 0
62+
%B0PTR = getelementptr half, ptr addrspace(1) %otherB, i32 0
63+
%A0 = load half, ptr addrspace(1) %A0PTR, align 2
64+
%B0 = load half, ptr addrspace(1) %B0PTR, align 2
65+
%add0 = fadd reassoc half %A0, %B0
66+
store half %add0, ptr addrspace(1) %p0, align 2
67+
%A1PTR = getelementptr half, ptr addrspace(1) %otherA, i32 1
68+
%B1PTR = getelementptr half, ptr addrspace(1) %otherB, i32 1
69+
%A1 = load half, ptr addrspace(1) %A1PTR, align 2
70+
%B1 = load half, ptr addrspace(1) %B1PTR, align 2
71+
%add1 = fadd reassoc half %A1, %B1
72+
store half %add1, ptr addrspace(1) %p1, align 2
73+
ret void
74+
}
75+
76+
77+
attributes #0 = { nounwind }
78+
79+
!0 = !{}
80+
;.
81+
; CHECK: [[META0]] = !{}
82+
;.

0 commit comments

Comments
 (0)