Skip to content

Commit b9d9406

Browse files
committed
Fix up tests after rebase
1 parent 16ac7ac commit b9d9406

File tree

8 files changed

+100
-109
lines changed

8 files changed

+100
-109
lines changed

llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,10 @@ define <2 x half> @chain_hi_to_lo_global() {
329329
; GFX11-TRUE16: ; %bb.0: ; %bb
330330
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
331331
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2
332-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
332+
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
333+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0
333334
; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
334-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
335-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
336-
; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[1:2], off
335+
; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off
337336
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
338337
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
339338
;

llvm/test/CodeGen/AMDGPU/fptosi.f16.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,13 @@ define amdgpu_kernel void @fptosi_v2f16_to_v2i16(
328328
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
329329
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1
330330
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
331-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
332-
; GFX11-TRUE16-NEXT: v_cvt_i16_f16_e32 v0.l, v0.l
331+
; GFX11-TRUE16-NEXT: v_cvt_i16_f16_e32 v1.l, v0.l
332+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
333333
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
334-
; GFX11-TRUE16-NEXT: v_cvt_i16_f16_e32 v1.l, v1.l
335-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
334+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
335+
; GFX11-TRUE16-NEXT: v_cvt_i16_f16_e32 v0.l, v0.l
336336
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
337-
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
337+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
338338
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
339339
; GFX11-TRUE16-NEXT: s_endpgm
340340
;

llvm/test/CodeGen/AMDGPU/fptoui.f16.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,13 +327,13 @@ define amdgpu_kernel void @fptoui_v2f16_to_v2i16(
327327
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
328328
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1
329329
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
330-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
331-
; GFX11-TRUE16-NEXT: v_cvt_u16_f16_e32 v0.l, v0.l
330+
; GFX11-TRUE16-NEXT: v_cvt_u16_f16_e32 v1.l, v0.l
331+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
332332
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
333-
; GFX11-TRUE16-NEXT: v_cvt_u16_f16_e32 v1.l, v1.l
334-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
333+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
334+
; GFX11-TRUE16-NEXT: v_cvt_u16_f16_e32 v0.l, v0.l
335335
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
336-
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
336+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
337337
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
338338
; GFX11-TRUE16-NEXT: s_endpgm
339339
;

llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,21 +1085,20 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
10851085
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10861086
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
10871087
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s1
1088-
; GFX11-TRUE16-NEXT: s_lshr_b32 s2, s0, 16
1089-
; GFX11-TRUE16-NEXT: s_lshr_b32 s3, s1, 16
1090-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
1091-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
10921088
; GFX11-TRUE16-NEXT: v_pk_max_f16 v2, s0, s1
1089+
; GFX11-TRUE16-NEXT: s_lshr_b32 s0, s0, 16
1090+
; GFX11-TRUE16-NEXT: s_lshr_b32 s1, s1, 16
1091+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
1092+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
10931093
; GFX11-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
1094-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1094+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
10951095
; GFX11-TRUE16-NEXT: v_cmp_o_f16_e64 s0, v1.l, v1.h
1096-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1097-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v2.l, vcc_lo
1098-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1099-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
1100-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1101-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1102-
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1096+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
1097+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
1098+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
1099+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1100+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s0
1101+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
11031102
; GFX11-TRUE16-NEXT: ;;#ASMSTART
11041103
; GFX11-TRUE16-NEXT: ; use v0
11051104
; GFX11-TRUE16-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -898,21 +898,20 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
898898
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
899899
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
900900
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, s1
901-
; GFX11-TRUE16-NEXT: s_lshr_b32 s2, s0, 16
902-
; GFX11-TRUE16-NEXT: s_lshr_b32 s3, s1, 16
903-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
904-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s3
905901
; GFX11-TRUE16-NEXT: v_pk_min_f16 v2, s0, s1
902+
; GFX11-TRUE16-NEXT: s_lshr_b32 s0, s0, 16
903+
; GFX11-TRUE16-NEXT: s_lshr_b32 s1, s1, 16
904+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
905+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
906906
; GFX11-TRUE16-NEXT: v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
907-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
907+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
908908
; GFX11-TRUE16-NEXT: v_cmp_o_f16_e64 s0, v1.l, v1.h
909-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2
910-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v2.l, vcc_lo
911-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
912-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s0
913-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
914-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
915-
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
909+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
910+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
911+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
912+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
913+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s0
914+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
916915
; GFX11-TRUE16-NEXT: ;;#ASMSTART
917916
; GFX11-TRUE16-NEXT: ; use v0
918917
; GFX11-TRUE16-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/load-constant-i16.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -739,29 +739,25 @@ define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #
739739
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v8, 0
740740
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
741741
; GFX12-TRUE16-NEXT: s_clause 0x7
742-
; GFX12-TRUE16-NEXT: global_load_d16_b16 v3, v8, s[0:1] offset:28
743742
; GFX12-TRUE16-NEXT: global_load_d16_b16 v2, v8, s[0:1] offset:24
744743
; GFX12-TRUE16-NEXT: global_load_d16_b16 v1, v8, s[0:1] offset:20
745744
; GFX12-TRUE16-NEXT: global_load_d16_b16 v0, v8, s[0:1] offset:16
746-
; GFX12-TRUE16-NEXT: global_load_d16_b16 v7, v8, s[0:1] offset:12
747745
; GFX12-TRUE16-NEXT: global_load_d16_b16 v6, v8, s[0:1] offset:8
748746
; GFX12-TRUE16-NEXT: global_load_d16_b16 v5, v8, s[0:1] offset:4
749747
; GFX12-TRUE16-NEXT: global_load_d16_b16 v4, v8, s[0:1]
750-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
748+
; GFX12-TRUE16-NEXT: global_load_d16_b16 v3, v8, s[0:1] offset:28
749+
; GFX12-TRUE16-NEXT: global_load_d16_b16 v7, v8, s[0:1] offset:12
750+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x1
751+
; GFX12-TRUE16-NEXT: s_clause 0x3
751752
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v3, v8, s[0:1] offset:30
752-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
753753
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v2, v8, s[0:1] offset:26
754-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
755754
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v1, v8, s[0:1] offset:22
756-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
757755
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v0, v8, s[0:1] offset:18
758-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
756+
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x4
757+
; GFX12-TRUE16-NEXT: s_clause 0x3
759758
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v7, v8, s[0:1] offset:14
760-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
761759
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v6, v8, s[0:1] offset:10
762-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
763760
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v5, v8, s[0:1] offset:6
764-
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
765761
; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v4, v8, s[0:1] offset:2
766762
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x4
767763
; GFX12-TRUE16-NEXT: global_store_b128 v[0:1], v[0:3], off

llvm/test/CodeGen/AMDGPU/select.f16.ll

Lines changed: 53 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -858,10 +858,10 @@ define amdgpu_kernel void @select_v2f16(
858858
; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3
859859
; GFX11-TRUE16-NEXT: s_mov_b32 s18, s2
860860
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3
861-
; GFX11-TRUE16-NEXT: s_mov_b32 s6, s2
862-
; GFX11-TRUE16-NEXT: s_mov_b32 s7, s3
863861
; GFX11-TRUE16-NEXT: s_mov_b32 s26, s2
864862
; GFX11-TRUE16-NEXT: s_mov_b32 s27, s3
863+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, s2
864+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, s3
865865
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
866866
; GFX11-TRUE16-NEXT: s_mov_b32 s20, s12
867867
; GFX11-TRUE16-NEXT: s_mov_b32 s21, s13
@@ -871,8 +871,8 @@ define amdgpu_kernel void @select_v2f16(
871871
; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[16:19], 0
872872
; GFX11-TRUE16-NEXT: s_mov_b32 s24, s14
873873
; GFX11-TRUE16-NEXT: s_mov_b32 s25, s15
874-
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[4:7], 0
875-
; GFX11-TRUE16-NEXT: buffer_load_b32 v3, off, s[24:27], 0
874+
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[24:27], 0
875+
; GFX11-TRUE16-NEXT: buffer_load_b32 v3, off, s[4:7], 0
876876
; GFX11-TRUE16-NEXT: s_mov_b32 s1, s9
877877
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(3)
878878
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
@@ -881,15 +881,15 @@ define amdgpu_kernel void @select_v2f16(
881881
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
882882
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
883883
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2
884-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
885-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v3
884+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
886885
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, v5.l, v4.l
887-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, v2.l, v3.l, vcc_lo
888-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
889-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v1.l, s0
890-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v2
886+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
887+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, v2.l, vcc_lo
888+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3
889+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
890+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
891+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s0
891892
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s8
892-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
893893
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
894894
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
895895
; GFX11-TRUE16-NEXT: s_endpgm
@@ -1067,15 +1067,15 @@ define amdgpu_kernel void @select_v2f16_imm_a(
10671067
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l
10681068
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
10691069
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1070-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
1071-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1070+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
10721071
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, 0x3900, v3.l
1072+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
10731073
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, v1.l, vcc_lo
1074-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1075-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v4.l, v0.l, s0
1074+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v2
10761075
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1076+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1077+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s0
10771078
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
1078-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
10791079
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
10801080
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
10811081
; GFX11-TRUE16-NEXT: s_endpgm
@@ -1246,15 +1246,15 @@ define amdgpu_kernel void @select_v2f16_imm_b(
12461246
; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l
12471247
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
12481248
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1
1249-
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
1250-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1249+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
12511250
; GFX11-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0x3900, v3.l
1251+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
12521252
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, v1.l, vcc_lo
1253-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1254-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v4.l, v0.l, s0
1253+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v2
12551254
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1255+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1256+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, s0
12561257
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
1257-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
12581258
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
12591259
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
12601260
; GFX11-TRUE16-NEXT: s_endpgm
@@ -1402,42 +1402,42 @@ define amdgpu_kernel void @select_v2f16_imm_c(
14021402
;
14031403
; GFX11-TRUE16-LABEL: select_v2f16_imm_c:
14041404
; GFX11-TRUE16: ; %bb.0: ; %entry
1405-
; GFX11-TRUE16-NEXT: s_load_b256 s[4:11], s[4:5], 0x24
1406-
; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1
1407-
; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
1408-
; GFX11-TRUE16-NEXT: s_mov_b32 s18, s2
1409-
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3
1410-
; GFX11-TRUE16-NEXT: s_mov_b32 s14, s2
1411-
; GFX11-TRUE16-NEXT: s_mov_b32 s15, s3
1412-
; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2
1413-
; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3
1405+
; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
1406+
; GFX11-TRUE16-NEXT: s_mov_b32 s10, -1
1407+
; GFX11-TRUE16-NEXT: s_mov_b32 s11, 0x31016000
1408+
; GFX11-TRUE16-NEXT: s_mov_b32 s18, s10
1409+
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s11
1410+
; GFX11-TRUE16-NEXT: s_mov_b32 s14, s10
1411+
; GFX11-TRUE16-NEXT: s_mov_b32 s15, s11
1412+
; GFX11-TRUE16-NEXT: s_mov_b32 s22, s10
1413+
; GFX11-TRUE16-NEXT: s_mov_b32 s23, s11
14141414
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1415-
; GFX11-TRUE16-NEXT: s_mov_b32 s16, s8
1416-
; GFX11-TRUE16-NEXT: s_mov_b32 s17, s9
1417-
; GFX11-TRUE16-NEXT: s_mov_b32 s12, s6
1418-
; GFX11-TRUE16-NEXT: s_mov_b32 s13, s7
1415+
; GFX11-TRUE16-NEXT: s_mov_b32 s16, s4
1416+
; GFX11-TRUE16-NEXT: s_mov_b32 s17, s5
1417+
; GFX11-TRUE16-NEXT: s_mov_b32 s12, s2
1418+
; GFX11-TRUE16-NEXT: s_mov_b32 s13, s3
14191419
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[16:19], 0
14201420
; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
1421-
; GFX11-TRUE16-NEXT: s_mov_b32 s20, s10
1422-
; GFX11-TRUE16-NEXT: s_mov_b32 s21, s11
1423-
; GFX11-TRUE16-NEXT: s_mov_b32 s1, s5
1421+
; GFX11-TRUE16-NEXT: s_mov_b32 s20, s6
1422+
; GFX11-TRUE16-NEXT: s_mov_b32 s21, s7
1423+
; GFX11-TRUE16-NEXT: s_mov_b32 s8, s0
14241424
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0
1425+
; GFX11-TRUE16-NEXT: s_mov_b32 s9, s1
14251426
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
14261427
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
14271428
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
1428-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1
14291429
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1.l, v0.l
1430+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1
14301431
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
1431-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1432-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1433-
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e64 s0, v4.l, v3.l
14341432
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo
1435-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, s0
1436-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1433+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1434+
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0.l, v3.l
1435+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2
14371436
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1438-
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
1437+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1438+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, vcc_lo
14391439
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1440-
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
1440+
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[8:11], 0
14411441
; GFX11-TRUE16-NEXT: s_endpgm
14421442
;
14431443
; GFX11-FAKE16-LABEL: select_v2f16_imm_c:
@@ -1590,34 +1590,32 @@ define amdgpu_kernel void @select_v2f16_imm_d(
15901590
; GFX11-TRUE16-NEXT: s_mov_b32 s19, s3
15911591
; GFX11-TRUE16-NEXT: s_mov_b32 s14, s2
15921592
; GFX11-TRUE16-NEXT: s_mov_b32 s15, s3
1593-
; GFX11-TRUE16-NEXT: s_mov_b32 s22, s2
1594-
; GFX11-TRUE16-NEXT: s_mov_b32 s23, s3
15951593
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
15961594
; GFX11-TRUE16-NEXT: s_mov_b32 s16, s8
15971595
; GFX11-TRUE16-NEXT: s_mov_b32 s17, s9
15981596
; GFX11-TRUE16-NEXT: s_mov_b32 s12, s6
15991597
; GFX11-TRUE16-NEXT: s_mov_b32 s13, s7
16001598
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[16:19], 0
16011599
; GFX11-TRUE16-NEXT: buffer_load_b32 v1, off, s[12:15], 0
1602-
; GFX11-TRUE16-NEXT: s_mov_b32 s20, s10
1603-
; GFX11-TRUE16-NEXT: s_mov_b32 s21, s11
1600+
; GFX11-TRUE16-NEXT: s_mov_b32 s12, s10
1601+
; GFX11-TRUE16-NEXT: s_mov_b32 s13, s11
16041602
; GFX11-TRUE16-NEXT: s_mov_b32 s1, s5
1605-
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[20:23], 0
1603+
; GFX11-TRUE16-NEXT: buffer_load_b32 v2, off, s[12:15], 0
16061604
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(2)
16071605
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
16081606
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
16091607
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1
16101608
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
16111609
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
1612-
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2
1610+
; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2
16131611
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
16141612
; GFX11-TRUE16-NEXT: v_cmp_lt_f16_e64 s0, v4.l, v3.l
1615-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3800, v2.l, vcc_lo
1616-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3900, v0.l, s0
1613+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3800, v2.l, vcc_lo
1614+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3900, v1.l, s0
16171615
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1618-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1616+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
16191617
; GFX11-TRUE16-NEXT: s_mov_b32 s0, s4
1620-
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
1618+
; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
16211619
; GFX11-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
16221620
; GFX11-TRUE16-NEXT: s_endpgm
16231621
;

0 commit comments

Comments
 (0)