Skip to content

Commit 5624107

Browse files
committed
update tests
1 parent d306cad commit 5624107

File tree

2 files changed

+92
-89
lines changed

2 files changed

+92
-89
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll

Lines changed: 72 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -809,10 +809,10 @@ define i96 @v_mul_i96(i96 %num, i96 %den) {
809809
; GFX1250-NEXT: v_mad_u32 v9, v2, v3, v5
810810
; GFX1250-NEXT: v_mov_b32_e32 v8, v1
811811
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
812-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[4:5], v6, v4, v[8:9]
813-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[2:3], v7, v3, v[4:5]
812+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[10:11], v6, v4, v[8:9]
813+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[4:5], v7, v3, v[10:11]
814814
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
815-
; GFX1250-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
815+
; GFX1250-NEXT: v_dual_mov_b32 v1, v4 :: v_dual_mov_b32 v2, v5
816816
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
817817
%result = mul i96 %num, %den
818818
ret i96 %result
@@ -1218,16 +1218,16 @@ define i128 @v_mul_i128(i128 %num, i128 %den) {
12181218
; GFX1250-NEXT: v_mad_nc_u64_u32 v[10:11], v9, v5, v[0:1]
12191219
; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v8, v4, 0
12201220
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1221-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[10:11], v2, v4, v[10:11]
1222-
; GFX1250-NEXT: v_mov_b32_e32 v12, v1
1221+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[12:13], v2, v4, v[10:11]
1222+
; GFX1250-NEXT: v_mov_b32_e32 v10, v1
12231223
; GFX1250-NEXT: v_mul_lo_u32 v1, v9, v6
12241224
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1225-
; GFX1250-NEXT: v_mov_b32_e32 v13, v10
1226-
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], vcc_lo, v8, v5, v[12:13]
1225+
; GFX1250-NEXT: v_mov_b32_e32 v11, v12
1226+
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], vcc_lo, v8, v5, v[10:11]
12271227
; GFX1250-NEXT: v_mul_lo_u32 v8, v8, v7
12281228
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1229-
; GFX1250-NEXT: v_mad_co_u64_u32 v[6:7], s0, v9, v4, v[12:13]
1230-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v8, null, v11, v8, s0
1229+
; GFX1250-NEXT: v_mad_co_u64_u32 v[6:7], s0, v9, v4, v[14:15]
1230+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v8, null, v13, v8, s0
12311231
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
12321232
; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, null, v8, v1, vcc_lo
12331233
; GFX1250-NEXT: v_mad_u32 v1, v2, v5, v1
@@ -2874,86 +2874,87 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
28742874
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
28752875
; GFX1250-NEXT: s_wait_kmcnt 0x0
28762876
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v0, v14, 0
2877-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v12, 0
2878-
; GFX1250-NEXT: v_mul_lo_u32 v27, v5, v10
2879-
; GFX1250-NEXT: v_mul_lo_u32 v29, v3, v12
2880-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2881-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v1, v13, v[16:17]
2882-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v1, v11, v[18:19]
2877+
; GFX1250-NEXT: v_mul_lo_u32 v30, v4, v11
2878+
; GFX1250-NEXT: v_mul_lo_u32 v29, v5, v10
2879+
; GFX1250-NEXT: v_mul_lo_u32 v31, v3, v12
2880+
; GFX1250-NEXT: v_mul_lo_u32 v32, v2, v13
2881+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v1, v13, v[16:17]
2882+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v0, v12, 0
2883+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2884+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[20:21], v2, v12, v[18:19]
2885+
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v1, v11, v[16:17]
28832886
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2884-
; GFX1250-NEXT: v_cndmask_b32_e64 v20, 0, 1, s0
2885-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v2, v12, v[16:17]
2887+
; GFX1250-NEXT: v_cndmask_b32_e64 v22, 0, 1, s0
2888+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v3, v11, v[20:21]
28862889
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2887-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19]
2888-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v20, vcc_lo
2889-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[20:21], v0, v10, 0
2890-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2891-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v3, v11, v[16:17]
2892-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19]
2890+
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v2, v10, v[18:19]
2891+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v22, vcc_lo
2892+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2893+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v4, v10, v[16:17]
2894+
; GFX1250-NEXT: v_mad_co_u64_u32 v[16:17], vcc_lo, v3, v9, v[20:21]
28932895
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2894-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v24, null, 0, v22, vcc_lo
2895-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v4, v10, v[16:17]
2896-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2897-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19]
2898-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v24, vcc_lo
2899-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2900-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v5, v9, v[16:17]
2901-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[22:23], v6, v8, v[16:17]
2902-
; GFX1250-NEXT: v_mad_co_u64_u32 v[16:17], s0, v1, v9, v[20:21]
2903-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
2904-
; GFX1250-NEXT: v_dual_mov_b32 v20, v19 :: v_dual_mov_b32 v21, v22
2905-
; GFX1250-NEXT: v_mul_lo_u32 v22, v6, v9
2896+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v22, vcc_lo
2897+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[20:21], v5, v9, v[18:19]
2898+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v10, 0
2899+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
2900+
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], vcc_lo, v4, v8, v[16:17]
2901+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v28, null, 0, v26, vcc_lo
2902+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2903+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[24:25], v6, v8, v[20:21]
2904+
; GFX1250-NEXT: v_mad_co_u64_u32 v[16:17], s0, v1, v9, v[18:19]
2905+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2906+
; GFX1250-NEXT: v_dual_mov_b32 v18, v23 :: v_dual_mov_b32 v19, v24
2907+
; GFX1250-NEXT: v_mul_lo_u32 v24, v6, v9
29062908
; GFX1250-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0
2907-
; GFX1250-NEXT: v_mad_co_u64_u32 v[24:25], s0, v2, v8, v[16:17]
2908-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2909-
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v0, v13, v[20:21]
2909+
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v0, v13, v[18:19]
2910+
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s0, v2, v8, v[16:17]
2911+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
29102912
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v6, s0
2911-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
29122913
; GFX1250-NEXT: v_mad_co_u64_u32 v[16:17], s0, v1, v12, v[20:21]
2913-
; GFX1250-NEXT: v_dual_mov_b32 v20, v25 :: v_dual_mov_b32 v21, v18
2914-
; GFX1250-NEXT: v_mul_lo_u32 v25, v4, v11
2915-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2916-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s2, v0, v11, v[20:21]
2917-
; GFX1250-NEXT: v_cndmask_b32_e64 v28, 0, 1, s2
2914+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
2915+
; GFX1250-NEXT: v_dual_mov_b32 v20, v19 :: v_dual_mov_b32 v21, v22
2916+
; GFX1250-NEXT: v_mov_b32_e32 v13, v18
2917+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2918+
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s2, v0, v11, v[20:21]
29182919
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s1, v2, v11, v[16:17]
2920+
; GFX1250-NEXT: v_cndmask_b32_e64 v11, 0, 1, s2
29192921
; GFX1250-NEXT: v_mad_nc_u64_u32 v[16:17], v0, v8, 0
2920-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2921-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s2, v1, v10, v[18:19]
2922-
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s3, v3, v10, v[20:21]
2923-
; GFX1250-NEXT: v_mul_lo_u32 v20, v2, v13
2924-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v21, null, 0, v28, s2
2925-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
2926-
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v2, v9, v[18:19]
2927-
; GFX1250-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_mov_b32 v19, v24
2928-
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s4, v4, v9, v[10:11]
2929-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v21, s2
2930-
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
2931-
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s6, v0, v9, v[18:19]
2922+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
2923+
; GFX1250-NEXT: v_mad_co_u64_u32 v[26:27], s2, v1, v10, v[22:23]
2924+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v33, null, 0, v11, s2
2925+
; GFX1250-NEXT: v_mad_co_u64_u32 v[22:23], s3, v3, v10, v[20:21]
2926+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2927+
; GFX1250-NEXT: v_mov_b32_e32 v12, v17
2928+
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s2, v2, v9, v[26:27]
2929+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2930+
; GFX1250-NEXT: v_mad_co_u64_u32 v[20:21], s6, v0, v9, v[12:13]
2931+
; GFX1250-NEXT: v_mad_co_u64_u32 v[18:19], s4, v4, v9, v[22:23]
2932+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v33, s2
29322933
; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v15
2933-
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v3, v8, v[12:13]
2934-
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
29352934
; GFX1250-NEXT: v_mul_lo_u32 v9, v1, v14
2936-
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s5, v5, v8, v[10:11]
2935+
; GFX1250-NEXT: v_mad_co_u64_u32 v[12:13], s2, v3, v8, v[10:11]
2936+
; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
29372937
; GFX1250-NEXT: v_add_co_ci_u32_e64 v2, null, 0, v2, s2
2938-
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v1, v8, v[18:19]
2938+
; GFX1250-NEXT: v_mad_co_u64_u32 v[10:11], s5, v5, v8, v[18:19]
2939+
; GFX1250-NEXT: v_mad_co_u64_u32 v[14:15], s2, v1, v8, v[20:21]
29392940
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
29402941
; GFX1250-NEXT: v_add_co_ci_u32_e64 v3, s2, v3, v12, s2
29412942
; GFX1250-NEXT: v_add_co_ci_u32_e64 v4, s2, v6, v13, s2
29422943
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
29432944
; GFX1250-NEXT: v_add_co_ci_u32_e64 v5, s2, v2, v10, s2
2944-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s2, v26, v11, s2
2945+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v6, s2, v28, v11, s2
29452946
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2946-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v23, v0, s2
2947+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v25, v0, s2
29472948
; GFX1250-NEXT: v_dual_mov_b32 v2, v15 :: v_dual_mov_b32 v1, v14
29482949
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v9, s5
29492950
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2950-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v20, s4
2951-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v29, s3
2951+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v32, s4
2952+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v31, s3
29522953
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2953-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v25, s1
2954-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v27, s0
2954+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v30, s1
2955+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v29, s0
29552956
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2956-
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v22, vcc_lo
2957+
; GFX1250-NEXT: v_add_co_ci_u32_e64 v0, null, v0, v24, vcc_lo
29572958
; GFX1250-NEXT: v_mad_u32 v7, v7, v8, v0
29582959
; GFX1250-NEXT: v_mov_b32_e32 v0, v16
29592960
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -3018,9 +3019,9 @@ define amdgpu_ps void @s_mul_u64_zext_with_vregs(ptr addrspace(1) %out, ptr addr
30183019
;
30193020
; GFX1250-LABEL: s_mul_u64_zext_with_vregs:
30203021
; GFX1250: ; %bb.0:
3021-
; GFX1250-NEXT: global_load_b32 v2, v[2:3], off
3022+
; GFX1250-NEXT: global_load_b32 v4, v[2:3], off
30223023
; GFX1250-NEXT: s_wait_loadcnt 0x0
3023-
; GFX1250-NEXT: v_mad_nc_u64_u32 v[2:3], 0x50, v2, 0
3024+
; GFX1250-NEXT: v_mad_nc_u64_u32 v[2:3], 0x50, v4, 0
30243025
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
30253026
; GFX1250-NEXT: s_endpgm
30263027
%val = load i32, ptr addrspace(1) %in, align 4
@@ -3212,9 +3213,9 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
32123213
;
32133214
; GFX1250-LABEL: s_mul_u64_sext_with_vregs:
32143215
; GFX1250: ; %bb.0:
3215-
; GFX1250-NEXT: global_load_b32 v2, v[2:3], off
3216+
; GFX1250-NEXT: global_load_b32 v4, v[2:3], off
32163217
; GFX1250-NEXT: s_wait_loadcnt 0x0
3217-
; GFX1250-NEXT: v_mad_nc_i64_i32 v[2:3], 0x50, v2, 0
3218+
; GFX1250-NEXT: v_mad_nc_i64_i32 v[2:3], 0x50, v4, 0
32183219
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
32193220
; GFX1250-NEXT: s_endpgm
32203221
%val = load i32, ptr addrspace(1) %in, align 4

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,14 +3238,15 @@ define amdgpu_kernel void @test_mfma_i32_16x16x4i8_splatimm_src2_64(ptr addrspac
32383238
;
32393239
; GFX942-VGPR-LABEL: test_mfma_i32_16x16x4i8_splatimm_src2_64:
32403240
; GFX942-VGPR: ; %bb.0: ; %bb
3241-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1
3242-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2
3241+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 1
3242+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2
32433243
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
3244+
; GFX942-VGPR-NEXT: s_nop 0
3245+
; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v16, v17, 64 cbsz:1 abid:2 blgp:3
32443246
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
3245-
; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, 64 cbsz:1 abid:2 blgp:3
32463247
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
32473248
; GFX942-VGPR-NEXT: s_nop 7
3248-
; GFX942-VGPR-NEXT: s_nop 1
3249+
; GFX942-VGPR-NEXT: s_nop 0
32493250
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
32503251
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
32513252
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4604,14 +4605,14 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) %
46044605
;
46054606
; GFX942-VGPR-LABEL: test_mfma_f32_16x16x1f32_imm_splat:
46064607
; GFX942-VGPR: ; %bb.0: ; %bb
4607-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0
4608-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0
4608+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 1.0
4609+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2.0
46094610
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
4611+
; GFX942-VGPR-NEXT: s_nop 0
4612+
; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v16, v17, 1.0
46104613
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
4611-
; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, 1.0
46124614
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
46134615
; GFX942-VGPR-NEXT: s_nop 7
4614-
; GFX942-VGPR-NEXT: s_nop 0
46154616
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
46164617
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
46174618
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4760,16 +4761,17 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) %
47604761
;
47614762
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8f16_imm_splat:
47624763
; GFX942-VGPR: ; %bb.0: ; %bb
4763-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0x3c003c00
4764-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
4765-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0x40004000
4766-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v2
4764+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0x3c003c00
4765+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, v16
4766+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 0x40004000
4767+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v18
47674768
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
4769+
; GFX942-VGPR-NEXT: s_nop 0
4770+
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[16:17], v[18:19], 1.0
47684771
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
4769-
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], 1.0
47704772
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
47714773
; GFX942-VGPR-NEXT: s_nop 7
4772-
; GFX942-VGPR-NEXT: s_nop 1
4774+
; GFX942-VGPR-NEXT: s_nop 0
47734775
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
47744776
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
47754777
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4984,15 +4986,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) %
49844986
;
49854987
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x1f32_imm_splat:
49864988
; GFX942-VGPR: ; %bb.0: ; %bb
4987-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0
4988-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0
4989+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 1.0
4990+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 2.0
49894991
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
4992+
; GFX942-VGPR-NEXT: s_nop 0
4993+
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v32, v33, 0
49904994
; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 0
4991-
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 0
49924995
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
49934996
; GFX942-VGPR-NEXT: s_nop 7
49944997
; GFX942-VGPR-NEXT: s_nop 7
4995-
; GFX942-VGPR-NEXT: s_nop 0
49964998
; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
49974999
; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
49985000
; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80

0 commit comments

Comments
 (0)