Skip to content

Commit d5edd7a

Browse files
committed
AMDGPU: Fix handling of negative scratch offset
1 parent 785b16a commit d5edd7a

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1997,7 +1997,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
19971997
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
19981998
return false;
19991999
SAddr = SelectSAddrFI(CurDAG, SAddr);
2000-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2000+
Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
20012001
return true;
20022002
}
20032003

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s
2+
3+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
4+
target triple = "amdgcn-amd-amdhsa"
5+
6+
%union.anon.41 = type { [4 x i64] }
7+
%union.anon.2 = type { i8 }
8+
9+
define fastcc void @_ZN10PrimitivesI12rccl_bfloat810FuncMinMaxIS0_E13FanAsymmetricILi1ELi1EELi1E10ProtoLL128Li0EE9localCopyEPS0_S7_i(i32 %0, i64 %idx.ext62.i.i) {
10+
entry:
11+
%1 = alloca %union.anon.41, i32 0, align 8, addrspace(5)
12+
%add.ptr63.i.i3 = getelementptr %union.anon.2, ptr null, i64 %idx.ext62.i.i
13+
br label %for.body69.i.i.epil3
14+
15+
for.body69.i.i.epil3: ; preds = %for.body69.i.i.epil3, %entry
16+
%i.0117.i.i.epil4 = phi i32 [ %inc.i.i.7.epil, %for.body69.i.i.epil3 ], [ %0, %entry ]
17+
%conv65.i.i.epil5 = zext i32 %i.0117.i.i.epil4 to i64
18+
%arrayidx73.i.i.epil6 = getelementptr [32 x i8], ptr addrspace(5) %1, i32 0, i32 %i.0117.i.i.epil4
19+
%add.ptr75.i.i.epil7 = getelementptr i8, ptr %add.ptr63.i.i3, i64 %conv65.i.i.epil5
20+
%2 = load <4 x i8>, ptr addrspace(5) %arrayidx73.i.i.epil6, align 8
21+
store <4 x i8> %2, ptr %add.ptr75.i.i.epil7, align 1
22+
%inc.i.i.3.epil = or disjoint i32 %i.0117.i.i.epil4, 1
23+
%conv65.i.i.4.epil = zext i32 %inc.i.i.3.epil to i64
24+
%arrayidx73.i.i.4.epil = getelementptr [32 x i8], ptr addrspace(5) %1, i32 0, i32 %inc.i.i.3.epil
25+
%add.ptr75.i.i.4.epil = getelementptr i8, ptr %add.ptr63.i.i3, i64 %conv65.i.i.4.epil
26+
%3 = load <4 x i8>, ptr addrspace(5) %arrayidx73.i.i.4.epil, align 4
27+
store <4 x i8> %3, ptr %add.ptr75.i.i.4.epil, align 1
28+
%inc.i.i.7.epil = add nuw i32 %i.0117.i.i.epil4, 1
29+
br label %for.body69.i.i.epil3
30+
31+
for.body69.i.i.epil3.for.cond.cleanup68.loopexit.i.i.unr-lcssa_crit_edge: ; No predecessors!
32+
%conv65.i.i.epil = zext i32 %inc.i.i.7.epil to i64
33+
ret void
34+
}
35+
36+
; CHECK: SCRATCH_LOAD_DWORD_SVS %{{[0-9]+}}, %{{[0-9]+}}, -1

0 commit comments

Comments
 (0)