Skip to content

Commit 78a1088

Browse files
committed
fix test
1 parent b2c7a76 commit 78a1088

File tree

4 files changed

+61
-75
lines changed

4 files changed

+61
-75
lines changed

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
188188
OutMI.addOperand(Src);
189189
return;
190190
} else if (const auto *Info = AMDGPU::getT16D16Helper(Opcode)) {
191-
uint16_t OpName = AMDGPU::OpName::OPERAND_LAST;
191+
const SIRegisterInfo &TRI = TII->getRegisterInfo();
192+
llvm::AMDGPU::OpName OpName;
192193
if (TII->isDS(Opcode)) {
193194
if (MI->mayLoad())
194195
OpName = llvm::AMDGPU::OpName::vdst;

llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -239,53 +239,48 @@ define amdgpu_kernel void @store_lds_v4i32_align1(ptr addrspace(3) %out, <4 x i3
239239
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
240240
; GFX11-NEXT: s_and_b32 s6, 0xffff, s0
241241
; GFX11-NEXT: s_lshr_b32 s5, s0, 16
242-
; GFX11-NEXT: v_mov_b16_e32 v0.l, s0
243-
; GFX11-NEXT: v_mov_b32_e32 v5, s4
242+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s4
244243
; GFX11-NEXT: s_lshr_b32 s0, s1, 16
245244
; GFX11-NEXT: s_and_b32 s4, 0xffff, s1
246-
; GFX11-NEXT: v_mov_b16_e32 v0.h, s1
245+
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v3, s2
247246
; GFX11-NEXT: s_lshr_b32 s1, s2, 16
248247
; GFX11-NEXT: s_and_b32 s7, 0xffff, s2
249-
; GFX11-NEXT: v_mov_b16_e32 v1.l, s2
250248
; GFX11-NEXT: s_lshr_b32 s2, s6, 8
251249
; GFX11-NEXT: s_lshr_b32 s6, s5, 8
252-
; GFX11-NEXT: v_mov_b16_e32 v2.h, s2
253-
; GFX11-NEXT: v_mov_b16_e32 v1.h, s5
250+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
251+
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s6
252+
; GFX11-NEXT: v_dual_mov_b32 v4, s5 :: v_dual_mov_b32 v5, s0
254253
; GFX11-NEXT: s_lshr_b32 s4, s4, 8
255254
; GFX11-NEXT: s_lshr_b32 s5, s0, 8
256-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
257255
; GFX11-NEXT: s_lshr_b32 s0, s7, 8
258-
; GFX11-NEXT: v_mov_b16_e32 v3.l, s6
259-
; GFX11-NEXT: v_mov_b16_e32 v3.h, s4
260-
; GFX11-NEXT: v_mov_b16_e32 v4.l, s5
261-
; GFX11-NEXT: ds_store_b8 v5, v0
262-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v2 offset:1
263-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v1 offset:2
264-
; GFX11-NEXT: ds_store_b8 v5, v3 offset:3
265-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v0 offset:4
266-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v3 offset:5
267-
; GFX11-NEXT: ds_store_b8 v5, v2 offset:6
268-
; GFX11-NEXT: ds_store_b8 v5, v4 offset:7
269-
; GFX11-NEXT: v_mov_b16_e32 v0.l, s0
256+
; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5
257+
; GFX11-NEXT: ds_store_b8 v1, v0
258+
; GFX11-NEXT: ds_store_b8 v1, v6 offset:1
259+
; GFX11-NEXT: ds_store_b8 v1, v4 offset:2
260+
; GFX11-NEXT: ds_store_b8 v1, v7 offset:3
261+
; GFX11-NEXT: ds_store_b8 v1, v2 offset:4
262+
; GFX11-NEXT: ds_store_b8 v1, v8 offset:5
263+
; GFX11-NEXT: ds_store_b8 v1, v5 offset:6
264+
; GFX11-NEXT: ds_store_b8 v1, v9 offset:7
265+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v5, s3
270266
; GFX11-NEXT: s_lshr_b32 s0, s1, 8
271-
; GFX11-NEXT: v_mov_b16_e32 v0.h, s1
272-
; GFX11-NEXT: v_mov_b16_e32 v1.h, s0
267+
; GFX11-NEXT: v_mov_b32_e32 v2, s1
268+
; GFX11-NEXT: v_mov_b32_e32 v4, s0
273269
; GFX11-NEXT: s_and_b32 s0, 0xffff, s3
274270
; GFX11-NEXT: s_lshr_b32 s1, s3, 16
275271
; GFX11-NEXT: s_lshr_b32 s0, s0, 8
276-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s3
277-
; GFX11-NEXT: v_mov_b16_e32 v2.h, s0
272+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
273+
; GFX11-NEXT: v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
278274
; GFX11-NEXT: s_lshr_b32 s0, s1, 8
279-
; GFX11-NEXT: v_mov_b16_e32 v3.l, s1
280-
; GFX11-NEXT: v_mov_b16_e32 v3.h, s0
281-
; GFX11-NEXT: ds_store_b8 v5, v1 offset:8
282-
; GFX11-NEXT: ds_store_b8 v5, v0 offset:9
283-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v0 offset:10
284-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v1 offset:11
285-
; GFX11-NEXT: ds_store_b8 v5, v2 offset:12
286-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v2 offset:13
287-
; GFX11-NEXT: ds_store_b8 v5, v3 offset:14
288-
; GFX11-NEXT: ds_store_b8_d16_hi v5, v3 offset:15
275+
; GFX11-NEXT: v_mov_b32_e32 v8, s0
276+
; GFX11-NEXT: ds_store_b8 v1, v3 offset:8
277+
; GFX11-NEXT: ds_store_b8 v1, v0 offset:9
278+
; GFX11-NEXT: ds_store_b8 v1, v2 offset:10
279+
; GFX11-NEXT: ds_store_b8 v1, v4 offset:11
280+
; GFX11-NEXT: ds_store_b8 v1, v5 offset:12
281+
; GFX11-NEXT: ds_store_b8 v1, v6 offset:13
282+
; GFX11-NEXT: ds_store_b8 v1, v7 offset:14
283+
; GFX11-NEXT: ds_store_b8 v1, v8 offset:15
289284
; GFX11-NEXT: s_endpgm
290285
store <4 x i32> %x, ptr addrspace(3) %out, align 1
291286
ret void

llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -207,42 +207,36 @@ define amdgpu_kernel void @store_lds_v3i32_align1(ptr addrspace(3) %out, <3 x i3
207207
; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x0
208208
; GFX11-NEXT: s_and_b32 s5, 0xffff, s0
209209
; GFX11-NEXT: s_lshr_b32 s4, s0, 16
210-
; GFX11-NEXT: v_mov_b16_e32 v0.l, s0
211210
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
212-
; GFX11-NEXT: v_mov_b32_e32 v6, s3
211+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s3
213212
; GFX11-NEXT: s_lshr_b32 s0, s1, 16
214213
; GFX11-NEXT: s_and_b32 s3, 0xffff, s1
215-
; GFX11-NEXT: v_mov_b16_e32 v0.h, s1
214+
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v3, s2
216215
; GFX11-NEXT: s_lshr_b32 s1, s2, 16
217216
; GFX11-NEXT: s_and_b32 s6, 0xffff, s2
218-
; GFX11-NEXT: v_mov_b16_e32 v1.l, s2
219217
; GFX11-NEXT: s_lshr_b32 s2, s5, 8
220218
; GFX11-NEXT: s_lshr_b32 s5, s4, 8
221-
; GFX11-NEXT: v_mov_b16_e32 v3.l, s2
222-
; GFX11-NEXT: v_mov_b16_e32 v1.h, s4
219+
; GFX11-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s2
220+
; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s0
223221
; GFX11-NEXT: s_lshr_b32 s3, s3, 8
224222
; GFX11-NEXT: s_lshr_b32 s4, s0, 8
225-
; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
226223
; GFX11-NEXT: s_lshr_b32 s0, s6, 8
227224
; GFX11-NEXT: s_lshr_b32 s6, s1, 8
228-
; GFX11-NEXT: v_mov_b16_e32 v3.h, s5
229-
; GFX11-NEXT: v_mov_b16_e32 v2.h, s1
230-
; GFX11-NEXT: v_mov_b16_e32 v4.l, s3
231-
; GFX11-NEXT: v_mov_b16_e32 v4.h, s4
232-
; GFX11-NEXT: v_mov_b16_e32 v5.l, s0
233-
; GFX11-NEXT: v_mov_b16_e32 v5.h, s6
234-
; GFX11-NEXT: ds_store_b8 v6, v0
235-
; GFX11-NEXT: ds_store_b8 v6, v3 offset:1
236-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v1 offset:2
237-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v3 offset:3
238-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v0 offset:4
239-
; GFX11-NEXT: ds_store_b8 v6, v4 offset:5
240-
; GFX11-NEXT: ds_store_b8 v6, v2 offset:6
241-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v4 offset:7
242-
; GFX11-NEXT: ds_store_b8 v6, v1 offset:8
243-
; GFX11-NEXT: ds_store_b8 v6, v5 offset:9
244-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v2 offset:10
245-
; GFX11-NEXT: ds_store_b8_d16_hi v6, v5 offset:11
225+
; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v9, s3
226+
; GFX11-NEXT: v_dual_mov_b32 v10, s4 :: v_dual_mov_b32 v11, s0
227+
; GFX11-NEXT: v_mov_b32_e32 v12, s6
228+
; GFX11-NEXT: ds_store_b8 v1, v0
229+
; GFX11-NEXT: ds_store_b8 v1, v7 offset:1
230+
; GFX11-NEXT: ds_store_b8 v1, v4 offset:2
231+
; GFX11-NEXT: ds_store_b8 v1, v8 offset:3
232+
; GFX11-NEXT: ds_store_b8 v1, v2 offset:4
233+
; GFX11-NEXT: ds_store_b8 v1, v9 offset:5
234+
; GFX11-NEXT: ds_store_b8 v1, v5 offset:6
235+
; GFX11-NEXT: ds_store_b8 v1, v10 offset:7
236+
; GFX11-NEXT: ds_store_b8 v1, v3 offset:8
237+
; GFX11-NEXT: ds_store_b8 v1, v11 offset:9
238+
; GFX11-NEXT: ds_store_b8 v1, v6 offset:10
239+
; GFX11-NEXT: ds_store_b8 v1, v12 offset:11
246240
; GFX11-NEXT: s_endpgm
247241
store <3 x i32> %x, ptr addrspace(3) %out, align 1
248242
ret void

llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,10 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
229229
;
230230
; GFX11-LABEL: add_x_shl_max_offset:
231231
; GFX11: ; %bb.0:
232-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
232+
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
233233
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
234-
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 4, v0
235-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
236-
; GFX11-NEXT: ds_store_b8 v1, v0 offset:65535
234+
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
235+
; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535
237236
; GFX11-NEXT: s_endpgm
238237
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
239238
%shl = shl i32 %x.i, 4
@@ -274,12 +273,11 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
274273
;
275274
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
276275
; GFX11: ; %bb.0:
277-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
276+
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
278277
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
279278
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
280-
; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff, v0
281-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
282-
; GFX11-NEXT: ds_store_b8 v1, v0
279+
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
280+
; GFX11-NEXT: ds_store_b8 v0, v1
283281
; GFX11-NEXT: s_endpgm
284282
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
285283
%.neg = mul i32 %x.i, -4
@@ -320,12 +318,11 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
320318
;
321319
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
322320
; GFX11: ; %bb.0:
323-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
321+
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
324322
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
325323
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
326-
; GFX11-NEXT: v_xor_b32_e32 v1, 0xffff, v0
327-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
328-
; GFX11-NEXT: ds_store_b8 v1, v0
324+
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
325+
; GFX11-NEXT: ds_store_b8 v0, v1
329326
; GFX11-NEXT: s_endpgm
330327
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
331328
%neg = sub i32 0, %x.i
@@ -364,12 +361,11 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
364361
;
365362
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
366363
; GFX11: ; %bb.0:
367-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
364+
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
368365
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
369366
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
370-
; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0x10000, v0
371-
; GFX11-NEXT: v_mov_b16_e32 v0.l, 13
372-
; GFX11-NEXT: ds_store_b8 v1, v0
367+
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0
368+
; GFX11-NEXT: ds_store_b8 v0, v1
373369
; GFX11-NEXT: s_endpgm
374370
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
375371
%neg = sub i32 0, %x.i

0 commit comments

Comments
 (0)