Skip to content

Commit 518d4ec

Browse files
committed
fix test
1 parent b2c7a76 commit 518d4ec

File tree

5 files changed

+1301
-701
lines changed

5 files changed

+1301
-701
lines changed

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -187,47 +187,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
187187
OutMI.addOperand(Dest);
188188
OutMI.addOperand(Src);
189189
return;
190-
} else if (const auto *Info = AMDGPU::getT16D16Helper(Opcode)) {
191-
uint16_t OpName = AMDGPU::OpName::OPERAND_LAST;
192-
if (TII->isDS(Opcode)) {
193-
if (MI->mayLoad())
194-
OpName = llvm::AMDGPU::OpName::vdst;
195-
else if (MI->mayStore())
196-
OpName = llvm::AMDGPU::OpName::data0;
197-
else
198-
llvm_unreachable("LDS load or store expected");
199-
} else {
200-
OpName = AMDGPU::hasNamedOperand(Opcode, llvm::AMDGPU::OpName::vdata)
201-
? llvm::AMDGPU::OpName::vdata
202-
: llvm::AMDGPU::OpName::vdst;
203-
}
204-
int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, OpName);
205-
MachineOperand MIVDstOrVData = MI->getOperand(VDstOrVDataIdx);
206-
bool IsHi = AMDGPU::isHi16Reg(MIVDstOrVData.getReg(), TRI);
207-
Opcode = IsHi ? Info->HiOp : Info->LoOp;
208-
MIVDstOrVData.clearParent(); // Avoid use list error in setReg call
209-
MIVDstOrVData.setReg(TRI.get32BitRegister(MIVDstOrVData.getReg()));
210-
211-
int MCOpcode = TII->pseudoToMCOpcode(Opcode);
212-
assert(MCOpcode != -1 &&
213-
"Pseudo instruction doesn't have a target-specific version");
214-
OutMI.setOpcode(MCOpcode);
215-
for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) {
216-
const MachineOperand &MO = MI->getOperand(I);
217-
MCOperand MCOp;
218-
if (I == VDstOrVDataIdx)
219-
lowerOperand(MIVDstOrVData, MCOp);
220-
else
221-
lowerOperand(MO, MCOp);
222-
OutMI.addOperand(MCOp);
223-
}
224-
225-
if (AMDGPU::hasNamedOperand(MCOpcode, AMDGPU::OpName::vdst_in)) {
226-
MCOperand MCOp;
227-
lowerOperand(MIVDstOrVData, MCOp);
228-
OutMI.addOperand(MCOp);
229-
}
230-
return;
231190
} else if (Opcode == AMDGPU::SI_TCRETURN ||
232191
Opcode == AMDGPU::SI_TCRETURN_GFX) {
233192
// TODO: How to use branch immediate and avoid register+add?

llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -239,53 +239,48 @@ define amdgpu_kernel void @store_lds_v4i32_align1(ptr addrspace(3) %out, <4 x i3
239239
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
240240
; GFX11-NEXT: s_and_b32 s6, 0xffff, s0
241241
; GFX11-NEXT: s_lshr_b32 s5, s0, 16
242-
; GFX11-NEXT: v_mov_b16_e32 v0.l, s0
243-
; GFX11-NEXT: v_mov_b32_e32 v5, s4
242+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s4
244243
; GFX11-NEXT: s_lshr_b32 s0, s1, 16
245244
; GFX11-NEXT: s_and_b32 s4, 0xffff, s1
246-
; GFX11-NEXT: v_mov_b16_e32 v0.h, s1
245+
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v3, s2
247246
; GFX11-NEXT: s_lshr_b32 s1, s2, 16
248247
; GFX11-NEXT: s_and_b32 s7, 0xffff, s2
249-
; GFX11-NEXT: v_mov_b16_e32 v1.l, s2
250248
; GFX11-NEXT: s_lshr_b32 s2, s6, 8
251249
; GFX11-NEXT: s_lshr_b32 s6, s5, 8
252-
; GFX11-NEXT: v_mov_b16_e32 v2.h, s2
253-
; GFX11-NEXT: v_mov_b16_e32 v1.h, s5
250+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
251+
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s6
252+
; GFX11-NEXT: v_dual_mov_b32 v4, s5 :: v_dual_mov_b32 v5, s0
254253
; GFX11-NEXT: s_lshr_b32 s4, s4, 8
255254
; GFX11-NEXT: s_lshr_b32 s5, s0, 8
256-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
257255
; GFX11-NEXT: s_lshr_b32 s0, s7, 8
258-
; GFX11-NEXT: v_mov_b16_e32 v3.l, s6
259-
; GFX11-NEXT: v_mov_b16_e32 v3.h, s4
260-
; GFX11-NEXT: v_mov_b16_e32 v4.l, s5
261-
; GFX11-NEXT: ds_store_b8 v5, v0
262-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v2 offset:1
263-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v1 offset:2
264-
; GFX11-NEXT: ds_store_b8 v5, v3 offset:3
265-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v0 offset:4
266-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v3 offset:5
267-
; GFX11-NEXT: ds_store_b8 v5, v2 offset:6
268-
; GFX11-NEXT: ds_store_b8 v5, v4 offset:7
269-
; GFX11-NEXT: v_mov_b16_e32 v0.l, s0
256+
; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5
257+
; GFX11-NEXT: ds_store_b8 v1, v0
258+
; GFX11-NEXT: ds_store_b8 v1, v6 offset:1
259+
; GFX11-NEXT: ds_store_b8 v1, v4 offset:2
260+
; GFX11-NEXT: ds_store_b8 v1, v7 offset:3
261+
; GFX11-NEXT: ds_store_b8 v1, v2 offset:4
262+
; GFX11-NEXT: ds_store_b8 v1, v8 offset:5
263+
; GFX11-NEXT: ds_store_b8 v1, v5 offset:6
264+
; GFX11-NEXT: ds_store_b8 v1, v9 offset:7
265+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v5, s3
270266
; GFX11-NEXT: s_lshr_b32 s0, s1, 8
271-
; GFX11-NEXT: v_mov_b16_e32 v0.h, s1
272-
; GFX11-NEXT: v_mov_b16_e32 v1.h, s0
267+
; GFX11-NEXT: v_mov_b32_e32 v2, s1
268+
; GFX11-NEXT: v_mov_b32_e32 v4, s0
273269
; GFX11-NEXT: s_and_b32 s0, 0xffff, s3
274270
; GFX11-NEXT: s_lshr_b32 s1, s3, 16
275271
; GFX11-NEXT: s_lshr_b32 s0, s0, 8
276-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s3
277-
; GFX11-NEXT: v_mov_b16_e32 v2.h, s0
272+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
273+
; GFX11-NEXT: v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
278274
; GFX11-NEXT: s_lshr_b32 s0, s1, 8
279-
; GFX11-NEXT: v_mov_b16_e32 v3.l, s1
280-
; GFX11-NEXT: v_mov_b16_e32 v3.h, s0
281-
; GFX11-NEXT: ds_store_b8 v5, v1 offset:8
282-
; GFX11-NEXT: ds_store_b8 v5, v0 offset:9
283-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v0 offset:10
284-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v1 offset:11
285-
; GFX11-NEXT: ds_store_b8 v5, v2 offset:12
286-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v2 offset:13
287-
; GFX11-NEXT: ds_store_b8 v5, v3 offset:14
288-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v3 offset:15
275+
; GFX11-NEXT: v_mov_b32_e32 v8, s0
276+
; GFX11-NEXT: ds_store_b8 v1, v3 offset:8
277+
; GFX11-NEXT: ds_store_b8 v1, v0 offset:9
278+
; GFX11-NEXT: ds_store_b8 v1, v2 offset:10
279+
; GFX11-NEXT: ds_store_b8 v1, v4 offset:11
280+
; GFX11-NEXT: ds_store_b8 v1, v5 offset:12
281+
; GFX11-NEXT: ds_store_b8 v1, v6 offset:13
282+
; GFX11-NEXT: ds_store_b8 v1, v7 offset:14
283+
; GFX11-NEXT: ds_store_b8 v1, v8 offset:15
289284
; GFX11-NEXT: s_endpgm
290285
store <4 x i32> %x, ptr addrspace(3) %out, align 1
291286
ret void

llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -207,42 +207,36 @@ define amdgpu_kernel void @store_lds_v3i32_align1(ptr addrspace(3) %out, <3 x i3
207207
; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x0
208208
; GFX11-NEXT: s_and_b32 s5, 0xffff, s0
209209
; GFX11-NEXT: s_lshr_b32 s4, s0, 16
210-
; GFX11-NEXT: v_mov_b16_e32 v0.l, s0
211210
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
212-
; GFX11-NEXT: v_mov_b32_e32 v6, s3
211+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s3
213212
; GFX11-NEXT: s_lshr_b32 s0, s1, 16
214213
; GFX11-NEXT: s_and_b32 s3, 0xffff, s1
215-
; GFX11-NEXT: v_mov_b16_e32 v0.h, s1
214+
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v3, s2
216215
; GFX11-NEXT: s_lshr_b32 s1, s2, 16
217216
; GFX11-NEXT: s_and_b32 s6, 0xffff, s2
218-
; GFX11-NEXT: v_mov_b16_e32 v1.l, s2
219217
; GFX11-NEXT: s_lshr_b32 s2, s5, 8
220218
; GFX11-NEXT: s_lshr_b32 s5, s4, 8
221-
; GFX11-NEXT: v_mov_b16_e32 v3.l, s2
222-
; GFX11-NEXT: v_mov_b16_e32 v1.h, s4
219+
; GFX11-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s2
220+
; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s0
223221
; GFX11-NEXT: s_lshr_b32 s3, s3, 8
224222
; GFX11-NEXT: s_lshr_b32 s4, s0, 8
225-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
226223
; GFX11-NEXT: s_lshr_b32 s0, s6, 8
227224
; GFX11-NEXT: s_lshr_b32 s6, s1, 8
228-
; GFX11-NEXT: v_mov_b16_e32 v3.h, s5
229-
; GFX11-NEXT: v_mov_b16_e32 v2.h, s1
230-
; GFX11-NEXT: v_mov_b16_e32 v4.l, s3
231-
; GFX11-NEXT: v_mov_b16_e32 v4.h, s4
232-
; GFX11-NEXT: v_mov_b16_e32 v5.l, s0
233-
; GFX11-NEXT: v_mov_b16_e32 v5.h, s6
234-
; GFX11-NEXT: ds_store_b8 v6, v0
235-
; GFX11-NEXT: ds_store_b8 v6, v3 offset:1
236-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v1 offset:2
237-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v3 offset:3
238-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v0 offset:4
239-
; GFX11-NEXT: ds_store_b8 v6, v4 offset:5
240-
; GFX11-NEXT: ds_store_b8 v6, v2 offset:6
241-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v4 offset:7
242-
; GFX11-NEXT: ds_store_b8 v6, v1 offset:8
243-
; GFX11-NEXT: ds_store_b8 v6, v5 offset:9
244-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v2 offset:10
245-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v5 offset:11
225+
; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v9, s3
226+
; GFX11-NEXT: v_dual_mov_b32 v10, s4 :: v_dual_mov_b32 v11, s0
227+
; GFX11-NEXT: v_mov_b32_e32 v12, s6
228+
; GFX11-NEXT: ds_store_b8 v1, v0
229+
; GFX11-NEXT: ds_store_b8 v1, v7 offset:1
230+
; GFX11-NEXT: ds_store_b8 v1, v4 offset:2
231+
; GFX11-NEXT: ds_store_b8 v1, v8 offset:3
232+
; GFX11-NEXT: ds_store_b8 v1, v2 offset:4
233+
; GFX11-NEXT: ds_store_b8 v1, v9 offset:5
234+
; GFX11-NEXT: ds_store_b8 v1, v5 offset:6
235+
; GFX11-NEXT: ds_store_b8 v1, v10 offset:7
236+
; GFX11-NEXT: ds_store_b8 v1, v3 offset:8
237+
; GFX11-NEXT: ds_store_b8 v1, v11 offset:9
238+
; GFX11-NEXT: ds_store_b8 v1, v6 offset:10
239+
; GFX11-NEXT: ds_store_b8 v1, v12 offset:11
246240
; GFX11-NEXT: s_endpgm
247241
store <3 x i32> %x, ptr addrspace(3) %out, align 1
248242
ret void

llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll

Lines changed: 72 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
33
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
44
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
5-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
5+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-TRUE16 %s
6+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-FAKE16 %s
67

78
declare i32 @llvm.amdgcn.workitem.id.x() #0
89

@@ -227,14 +228,22 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
227228
; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535
228229
; GFX10-NEXT: s_endpgm
229230
;
230-
; GFX11-LABEL: add_x_shl_max_offset:
231-
; GFX11: ; %bb.0:
232-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
233-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
234-
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 4, v0
235-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
236-
; GFX11-NEXT: ds_store_b8 v1, v0 offset:65535
237-
; GFX11-NEXT: s_endpgm
231+
; GFX11-TRUE16-LABEL: add_x_shl_max_offset:
232+
; GFX11-TRUE16: ; %bb.0:
233+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
234+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
235+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 4, v0
236+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
237+
; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 offset:65535
238+
; GFX11-TRUE16-NEXT: s_endpgm
239+
;
240+
; GFX11-FAKE16-LABEL: add_x_shl_max_offset:
241+
; GFX11-FAKE16: ; %bb.0:
242+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
243+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
244+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 4, v0
245+
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 offset:65535
246+
; GFX11-FAKE16-NEXT: s_endpgm
238247
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
239248
%shl = shl i32 %x.i, 4
240249
%add = add i32 %shl, 65535
@@ -272,15 +281,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
272281
; GFX10-NEXT: ds_write_b8 v0, v1
273282
; GFX10-NEXT: s_endpgm
274283
;
275-
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
276-
; GFX11: ; %bb.0:
277-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
278-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
279-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
280-
; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff, v0
281-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
282-
; GFX11-NEXT: ds_store_b8 v1, v0
283-
; GFX11-NEXT: s_endpgm
284+
; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
285+
; GFX11-TRUE16: ; %bb.0:
286+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
287+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
288+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
289+
; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0
290+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
291+
; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
292+
; GFX11-TRUE16-NEXT: s_endpgm
293+
;
294+
; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
295+
; GFX11-FAKE16: ; %bb.0:
296+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
297+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
298+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
299+
; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0
300+
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
301+
; GFX11-FAKE16-NEXT: s_endpgm
284302
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
285303
%.neg = mul i32 %x.i, -4
286304
%add = add i32 %.neg, 65535
@@ -318,15 +336,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
318336
; GFX10-NEXT: ds_write_b8 v0, v1
319337
; GFX10-NEXT: s_endpgm
320338
;
321-
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
322-
; GFX11: ; %bb.0:
323-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
324-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
325-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
326-
; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff, v0
327-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
328-
; GFX11-NEXT: ds_store_b8 v1, v0
329-
; GFX11-NEXT: s_endpgm
339+
; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
340+
; GFX11-TRUE16: ; %bb.0:
341+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
342+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
343+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
344+
; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0
345+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
346+
; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
347+
; GFX11-TRUE16-NEXT: s_endpgm
348+
;
349+
; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
350+
; GFX11-FAKE16: ; %bb.0:
351+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
352+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
353+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
354+
; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0
355+
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
356+
; GFX11-FAKE16-NEXT: s_endpgm
330357
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
331358
%neg = sub i32 0, %x.i
332359
%shl = shl i32 %neg, 2
@@ -362,15 +389,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
362389
; GFX10-NEXT: ds_write_b8 v0, v1
363390
; GFX10-NEXT: s_endpgm
364391
;
365-
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
366-
; GFX11: ; %bb.0:
367-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
368-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
369-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
370-
; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0x10000, v0
371-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
372-
; GFX11-NEXT: ds_store_b8 v1, v0
373-
; GFX11-NEXT: s_endpgm
392+
; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
393+
; GFX11-TRUE16: ; %bb.0:
394+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
395+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
396+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
397+
; GFX11-TRUE16-NEXT: v_sub_nc_u32_e32 v1, 0x10000, v0
398+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
399+
; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
400+
; GFX11-TRUE16-NEXT: s_endpgm
401+
;
402+
; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
403+
; GFX11-FAKE16: ; %bb.0:
404+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
405+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
406+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
407+
; GFX11-FAKE16-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0
408+
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
409+
; GFX11-FAKE16-NEXT: s_endpgm
374410
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
375411
%neg = sub i32 0, %x.i
376412
%shl = shl i32 %neg, 2

0 commit comments

Comments
 (0)