Skip to content

Commit 7905830

Browse files
committed
[AMDGPU] Update for reviewer.
1 parent 485df4e commit 7905830

File tree

6 files changed

+25
-44
lines changed

6 files changed

+25
-44
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2009,7 +2009,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
20092009
unsigned IntrOpcode = Intr->BaseOpcode;
20102010

20112011
// For image atomic: use no-return opcode if result is unused.
2012-
if (Intr->NoRetBaseOpcode != 0 && Intr->NoRetBaseOpcode != Intr->BaseOpcode) {
2012+
if (Intr->NoRetBaseOpcode != Intr->BaseOpcode) {
20132013
Register ResultDef = MI.getOperand(0).getReg();
20142014
if (MRI->use_nodbg_empty(ResultDef))
20152015
IntrOpcode = Intr->NoRetBaseOpcode;

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -994,11 +994,11 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
994994
string renamed = ""> {
995995
let hasSideEffects = 1, // FIXME: remove this
996996
mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
997-
FPAtomic = isFP in {
997+
FPAtomic = isFP, IsAtomicNoRet = noRtn in {
998998
let VAddrDwords = 1 in {
999999
let ssamp = 0 in {
10001000
if op.HAS_SI then {
1001-
def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, noRtn,enableDasm>;
1001+
def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
10021002
}
10031003
if op.HAS_VI then {
10041004
def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
@@ -1094,11 +1094,11 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
10941094

10951095
multiclass MIMG_Atomic_Impl <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
10961096
bit noRtn = 0, string renamed = ""> { // 64-bit atomics
1097-
let IsAtomicRet = !if(noRtn, 0, 1) in {
1097+
let IsAtomicRet = !not(noRtn) in {
10981098
def "" : MIMGBaseOpcode {
10991099
let Atomic = 1;
11001100
let AtomicX2 = isCmpSwap;
1101-
let NoReturn = !if(noRtn, 1, 0);
1101+
let NoReturn = noRtn;
11021102
}
11031103

11041104
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9150,7 +9150,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
91509150
MachineFunction &MF = DAG.getMachineFunction();
91519151
const GCNSubtarget *ST = &MF.getSubtarget<GCNSubtarget>();
91529152
unsigned IntrOpcode = Intr->BaseOpcode;
9153-
if (Intr->NoRetBaseOpcode != 0 && !Op.getNode()->hasAnyUseOfValue(0))
9153+
if (Intr->NoRetBaseOpcode != Intr->BaseOpcode &&
9154+
!Op.getNode()->hasAnyUseOfValue(0))
91549155
IntrOpcode = Intr->NoRetBaseOpcode;
91559156
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
91569157
AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
@@ -9159,12 +9160,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
91599160
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
91609161
bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
91619162

9162-
SmallVector<EVT, 3> ResultTypes(Op->values());
91639163
SmallVector<EVT, 3> OrigResultTypes(Op->values());
9164-
if (BaseOpcode->NoReturn && BaseOpcode->Atomic) {
9165-
ResultTypes.clear();
9164+
SmallVector<EVT, 3> ResultTypes;
9165+
if (BaseOpcode->NoReturn && BaseOpcode->Atomic)
91669166
ResultTypes.push_back(MVT::Other);
9167-
}
9167+
else
9168+
ResultTypes = OrigResultTypes;
91689169

91699170
bool IsD16 = false;
91709171
bool IsG16 = false;
@@ -9184,10 +9185,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
91849185
VData = Op.getOperand(2);
91859186

91869187
IsAtomicPacked16Bit =
9187-
(Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
9188-
Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
9189-
Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
9190-
Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
9188+
(IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
9189+
IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
9190+
IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
9191+
IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
91919192

91929193
bool Is64Bit = VData.getValueSizeInBits() == 64;
91939194
if (BaseOpcode->AtomicX2) {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,7 @@ define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s)
9292
; GFX90A-NEXT: ;;#ASMSTART
9393
; GFX90A-NEXT: ; def a0
9494
; GFX90A-NEXT: ;;#ASMEND
95-
; GFX90A-NEXT: v_accvgpr_read_b32 v1, a0
96-
; GFX90A-NEXT: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc
95+
; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm
9796
; GFX90A-NEXT: s_endpgm
9897
%data = call i32 asm "; def $0", "=a"()
9998
%unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -106,8 +105,7 @@ define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i
106105
; GFX90A-NEXT: ;;#ASMSTART
107106
; GFX90A-NEXT: ; def a0
108107
; GFX90A-NEXT: ;;#ASMEND
109-
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
110-
; GFX90A-NEXT: image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc
108+
; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm
111109
; GFX90A-NEXT: s_endpgm
112110
%data = call i32 asm "; def $0", "=a"()
113111
%unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -123,9 +121,7 @@ define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %
123121
; GFX90A-NEXT: ;;#ASMSTART
124122
; GFX90A-NEXT: ; def a1
125123
; GFX90A-NEXT: ;;#ASMEND
126-
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
127-
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
128-
; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
124+
; GFX90A-NEXT: image_atomic_cmpswap a[0:1], v0, s[0:7] dmask:0x3 unorm
129125
; GFX90A-NEXT: s_endpgm
130126
%cmp = call i32 asm "; def $0", "=a"()
131127
%swap = call i32 asm "; def $0", "=a"()
@@ -139,9 +135,7 @@ define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32
139135
; GFX90A-NEXT: ;;#ASMSTART
140136
; GFX90A-NEXT: ; def a[0:1]
141137
; GFX90A-NEXT: ;;#ASMEND
142-
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
143-
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
144-
; GFX90A-NEXT: image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
138+
; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm
145139
; GFX90A-NEXT: s_endpgm
146140
%data = call i64 asm "; def $0", "=a"()
147141
%unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -154,14 +148,10 @@ define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i3
154148
; GFX90A-NEXT: ;;#ASMSTART
155149
; GFX90A-NEXT: ; def a[0:1]
156150
; GFX90A-NEXT: ;;#ASMEND
157-
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
158-
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
159151
; GFX90A-NEXT: ;;#ASMSTART
160-
; GFX90A-NEXT: ; def a[0:1]
152+
; GFX90A-NEXT: ; def a[2:3]
161153
; GFX90A-NEXT: ;;#ASMEND
162-
; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1
163-
; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0
164-
; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
154+
; GFX90A-NEXT: image_atomic_cmpswap a[0:3], v0, s[0:7] dmask:0xf unorm
165155
; GFX90A-NEXT: s_endpgm
166156
%cmp = call i64 asm "; def $0", "=a"()
167157
%swap = call i64 asm "; def $0", "=a"()

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,12 +569,12 @@ define amdgpu_ps void @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %
569569
;
570570
; GFX12-GISE-LABEL: atomic_add_1d_slc:
571571
; GFX12-GISE: ; %bb.0:
572-
; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
572+
; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
573573
; GFX12-GISE-NEXT: s_endpgm
574574
;
575575
; GFX12-LABEL: atomic_add_1d_slc:
576576
; GFX12: ; %bb.0:
577-
; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
577+
; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
578578
; GFX12-NEXT: s_endpgm
579579
%v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
580580
ret void

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,12 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
4343
; GFX12-SDAG: ; %bb.0: ; %main_body
4444
; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
4545
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
46-
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
4746
; GFX12-SDAG-NEXT: ; return to shader part epilog
4847
;
4948
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret:
5049
; GFX12-GISEL: ; %bb.0: ; %main_body
5150
; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
5251
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
53-
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
5452
; GFX12-GISEL-NEXT: ; return to shader part epilog
5553
main_body:
5654
%unused = call <2 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v2f16.v2f16(<2 x half> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -81,14 +79,12 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
8179
; GFX12-SDAG: ; %bb.0: ; %main_body
8280
; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
8381
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
84-
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
8582
; GFX12-SDAG-NEXT: ; return to shader part epilog
8683
;
8784
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret:
8885
; GFX12-GISEL: ; %bb.0: ; %main_body
8986
; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
9087
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
91-
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
9288
; GFX12-GISEL-NEXT: ; return to shader part epilog
9389
main_body:
9490
%unused = call <4 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v4f16.v4f16(<4 x half> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -128,14 +124,12 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
128124
; GFX12-SDAG: ; %bb.0: ; %main_body
129125
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
130126
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
131-
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
132127
; GFX12-SDAG-NEXT: ; return to shader part epilog
133128
;
134129
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret:
135130
; GFX12-GISEL: ; %bb.0: ; %main_body
136131
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
137132
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
138-
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
139133
; GFX12-GISEL-NEXT: ; return to shader part epilog
140134
main_body:
141135
%unused = call <2 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v2bf16.v2bf16(<2 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -175,14 +169,12 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
175169
; GFX12-SDAG: ; %bb.0: ; %main_body
176170
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
177171
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
178-
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
179172
; GFX12-SDAG-NEXT: ; return to shader part epilog
180173
;
181174
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret:
182175
; GFX12-GISEL: ; %bb.0: ; %main_body
183176
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
184177
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
185-
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
186178
; GFX12-GISEL-NEXT: ; return to shader part epilog
187179
main_body:
188180
%unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -192,16 +184,14 @@ main_body:
192184
define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) {
193185
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt:
194186
; GFX12-SDAG: ; %bb.0: ; %main_body
195-
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
187+
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
196188
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
197-
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
198189
; GFX12-SDAG-NEXT: ; return to shader part epilog
199190
;
200191
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt:
201192
; GFX12-GISEL: ; %bb.0: ; %main_body
202-
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
193+
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
203194
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
204-
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
205195
; GFX12-GISEL-NEXT: ; return to shader part epilog
206196
main_body:
207197
%unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)

0 commit comments

Comments
 (0)