Skip to content

Commit ee4f647

Browse files
PeddleSpamLeon Clark
andauthored
[AMDGPU] Propagate AA info in vector load/store splitting. (#168871)
Fixes a bug in `AMDGPUISelLowering` where alias analysis info is not propagated to split loads and stores. This is required for #161375 --------- Co-authored-by: Leon Clark <[email protected]>
1 parent acab67b commit ee4f647

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,14 +1888,14 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
18881888
Align BaseAlign = Load->getAlign();
18891889
Align HiAlign = commonAlignment(BaseAlign, Size);
18901890

1891-
SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
1892-
Load->getChain(), BasePtr, SrcValue, LoMemVT,
1893-
BaseAlign, Load->getMemOperand()->getFlags());
1891+
SDValue LoLoad = DAG.getExtLoad(
1892+
Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue,
1893+
LoMemVT, BaseAlign, Load->getMemOperand()->getFlags(), Load->getAAInfo());
18941894
SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size));
1895-
SDValue HiLoad =
1896-
DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
1897-
HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
1898-
HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
1895+
SDValue HiLoad = DAG.getExtLoad(
1896+
Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr,
1897+
SrcValue.getWithOffset(LoMemVT.getStoreSize()), HiMemVT, HiAlign,
1898+
Load->getMemOperand()->getFlags(), Load->getAAInfo());
18991899

19001900
SDValue Join;
19011901
if (LoVT == HiVT) {
@@ -1983,10 +1983,10 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
19831983

19841984
SDValue LoStore =
19851985
DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
1986-
Store->getMemOperand()->getFlags());
1987-
SDValue HiStore =
1988-
DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
1989-
HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
1986+
Store->getMemOperand()->getFlags(), Store->getAAInfo());
1987+
SDValue HiStore = DAG.getTruncStore(
1988+
Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), HiMemVT, HiAlign,
1989+
Store->getMemOperand()->getFlags(), Store->getAAInfo());
19901990

19911991
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
19921992
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=finalize-isel < %s | FileCheck %s
2+
3+
; This test verifies that instruction selection will propagate alias metadata
4+
; to split loads and stores.
5+
6+
; CHECK: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope ![[IN:[0-9]+]], !noalias ![[OUT:[0-9]+]], addrspace 3)
7+
; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope ![[IN]], !noalias ![[OUT]], addrspace 3)
8+
; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, align 32, !alias.scope ![[IN]], !noalias ![[OUT]], addrspace 3)
9+
; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = DS_READ_B128_gfx9 {{.*}} :: (load (s128) from %{{.*}}, !alias.scope ![[IN]], !noalias ![[OUT]], addrspace 3)
10+
; CHECK: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3)
11+
; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE
12+
; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3)
13+
; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE
14+
; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3)
15+
; CHECK-NEXT: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE
16+
; CHECK-NEXT: DS_WRITE_B128_gfx9 {{.*}} :: (store (s128) into %{{.*}}, !alias.scope ![[OUT]], !noalias ![[IN]], addrspace 3)
17+
18+
define amdgpu_kernel void @test(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) {
19+
%idx = call i32 @llvm.amdgcn.workitem.id.x()
20+
%in.addr = getelementptr <16 x float>, ptr addrspace(3) %in, i32 %idx
21+
%val.0 = load <16 x float>, ptr addrspace(3) %in.addr, align 32, !alias.scope !4, !noalias !5
22+
%val.1 = call <16 x float> @llvm.amdgcn.wmma.f32.16x16x16.f32.v16f32.v16f32(<16 x float> %val.0, <16 x float> %val.0, <16 x float> %val.0, i1 false)
23+
%out.addr = getelementptr <16 x float>, ptr addrspace(3) %out, i32 %idx
24+
store <16 x float> %val.1, ptr addrspace(3) %out.addr, align 32, !alias.scope !5, !noalias !4
25+
ret void
26+
}
27+
28+
declare i32 @llvm.amdgcn.workitem.id.x()
29+
declare <16 x float> @llvm.amdgcn.wmma.f32.16x16x16.f32.v16f32.v16f32(<16 x float>, <16 x float>, <16 x float>, i1 immarg)
30+
31+
!0 = !{!"inout.domain"}
32+
!1 = !{!"in.scope", !0}
33+
!2 = !{!"out.scope", !0}
34+
!4 = !{!1}
35+
!5 = !{!2}

0 commit comments

Comments
 (0)