@@ -48,7 +48,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
4848; GCN-NEXT: v_accvgpr_write_b32 a2, v18
4949; GCN-NEXT: v_accvgpr_write_b32 a3, v19
5050; GCN-NEXT: s_nop 1
51- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
51+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
5252; GCN-NEXT: s_nop 7
5353; GCN-NEXT: s_nop 3
5454; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -72,7 +72,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
7272; GCN-NEXT: v_accvgpr_write_b32 a2, v18
7373; GCN-NEXT: v_accvgpr_write_b32 a3, v19
7474; GCN-NEXT: s_nop 1
75- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0 ,0]
75+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1 ,0]
7676; GCN-NEXT: s_nop 7
7777; GCN-NEXT: s_nop 3
7878; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -96,7 +96,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
9696; GCN-NEXT: v_accvgpr_write_b32 a2, v18
9797; GCN-NEXT: v_accvgpr_write_b32 a3, v19
9898; GCN-NEXT: s_nop 1
99- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0 ,0]
99+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1 ,0]
100100; GCN-NEXT: s_nop 7
101101; GCN-NEXT: s_nop 3
102102; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -120,7 +120,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
120120; GCN-NEXT: v_accvgpr_write_b32 a2, v18
121121; GCN-NEXT: v_accvgpr_write_b32 a3, v19
122122; GCN-NEXT: s_nop 1
123- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi :[0,0 ,0]
123+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel :[0,1,0] op_sel_hi:[0,1 ,0]
124124; GCN-NEXT: s_nop 7
125125; GCN-NEXT: s_nop 3
126126; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -144,7 +144,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
144144; GCN-NEXT: v_accvgpr_write_b32 a2, v18
145145; GCN-NEXT: v_accvgpr_write_b32 a3, v19
146146; GCN-NEXT: s_nop 1
147- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0 ,0,0]
147+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1 ,0,0]
148148; GCN-NEXT: s_nop 7
149149; GCN-NEXT: s_nop 3
150150; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -168,7 +168,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
168168; GCN-NEXT: v_accvgpr_write_b32 a2, v18
169169; GCN-NEXT: v_accvgpr_write_b32 a3, v19
170170; GCN-NEXT: s_nop 1
171- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi :[0,0 ,0]
171+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel :[0,1,0] op_sel_hi:[1,1 ,0]
172172; GCN-NEXT: s_nop 7
173173; GCN-NEXT: s_nop 3
174174; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -192,7 +192,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
192192; GCN-NEXT: v_accvgpr_write_b32 a2, v18
193193; GCN-NEXT: v_accvgpr_write_b32 a3, v19
194194; GCN-NEXT: s_nop 1
195- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[ 0,0,0]
195+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1, 0,0] op_sel_hi:[1,1 ,0]
196196; GCN-NEXT: s_nop 7
197197; GCN-NEXT: s_nop 3
198198; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1775,7 +1775,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
17751775; GCN-NEXT: v_accvgpr_write_b32 a2, v18
17761776; GCN-NEXT: v_accvgpr_write_b32 a3, v19
17771777; GCN-NEXT: s_nop 1
1778- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0 ,0]
1778+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1 ,0]
17791779; GCN-NEXT: s_nop 7
17801780; GCN-NEXT: s_nop 3
17811781; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1797,7 +1797,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
17971797; SDAG-NEXT: v_accvgpr_write_b32 a2, v18
17981798; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
17991799; SDAG-NEXT: s_nop 1
1800- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0 ,0]
1800+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1 ,0]
18011801; SDAG-NEXT: s_nop 7
18021802; SDAG-NEXT: s_nop 3
18031803; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1815,7 +1815,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18151815; GISEL-NEXT: v_accvgpr_write_b32 a3, v19
18161816; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
18171817; GISEL-NEXT: s_nop 1
1818- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0 ,0]
1818+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1 ,0]
18191819; GISEL-NEXT: s_nop 7
18201820; GISEL-NEXT: s_nop 3
18211821; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1838,7 +1838,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18381838; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
18391839; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d
18401840; SDAG-NEXT: s_nop 1
1841- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0 ,0]
1841+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1 ,0]
18421842; SDAG-NEXT: s_nop 7
18431843; SDAG-NEXT: s_nop 3
18441844; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1857,7 +1857,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
18571857; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
18581858; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d
18591859; GISEL-NEXT: s_nop 1
1860- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0 ,0]
1860+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1 ,0]
18611861; GISEL-NEXT: s_nop 7
18621862; GISEL-NEXT: s_nop 3
18631863; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1899,7 +1899,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
18991899; SDAG-NEXT: v_mov_b32_e32 v19, s3
19001900; SDAG-NEXT: v_mov_b32_e32 v21, s5
19011901; SDAG-NEXT: s_nop 1
1902+ <<<<<<< HEAD
19021903; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s4, v21 op_sel_hi:[0,0,0] blgp:2
1904+ =======
1905+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
1906+ >>>>>>> f33f71291d5d... [AMDGPU] Fix opsel for scaled MFMA operations
19031907; SDAG-NEXT: s_nop 7
19041908; SDAG-NEXT: s_nop 3
19051909; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[6:7]
@@ -1922,8 +1926,13 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
19221926; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23]
19231927; GISEL-NEXT: v_mov_b32_e32 v20, s25
19241928; GISEL-NEXT: s_nop 1
1929+ <<<<<<< HEAD
19251930; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s24, v20 op_sel_hi:[0,0,0] blgp:2
19261931; GISEL-NEXT: v_mov_b32_e32 v4, 0
1932+ =======
1933+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
1934+ ; GISEL-NEXT: v_mov_b32_e32 v0, 0
1935+ >>>>>>> f33f71291d5d... [AMDGPU] Fix opsel for scaled MFMA operations
19271936; GISEL-NEXT: s_nop 7
19281937; GISEL-NEXT: s_nop 2
19291938; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[26:27]
@@ -1964,6 +1973,31 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
19641973; SDAG-NEXT: s_nop 0
19651974; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s2, -2 op_sel_hi:[0,0,0]
19661975; SDAG-NEXT: s_waitcnt lgkmcnt(0)
1976+ <<<<<<< HEAD
1977+ =======
1978+ ; SDAG-NEXT: v_mov_b32_e32 v0, s8
1979+ ; SDAG-NEXT: v_mov_b32_e32 v1, s9
1980+ ; SDAG-NEXT: v_mov_b32_e32 v2, s10
1981+ ; SDAG-NEXT: v_mov_b32_e32 v3, s11
1982+ ; SDAG-NEXT: v_mov_b32_e32 v4, s12
1983+ ; SDAG-NEXT: v_mov_b32_e32 v5, s13
1984+ ; SDAG-NEXT: v_mov_b32_e32 v6, s14
1985+ ; SDAG-NEXT: v_mov_b32_e32 v7, s15
1986+ ; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
1987+ ; SDAG-NEXT: v_mov_b32_e32 v8, s16
1988+ ; SDAG-NEXT: v_mov_b32_e32 v9, s17
1989+ ; SDAG-NEXT: v_mov_b32_e32 v10, s18
1990+ ; SDAG-NEXT: v_mov_b32_e32 v11, s19
1991+ ; SDAG-NEXT: v_mov_b32_e32 v12, s20
1992+ ; SDAG-NEXT: v_mov_b32_e32 v13, s21
1993+ ; SDAG-NEXT: v_mov_b32_e32 v14, s22
1994+ ; SDAG-NEXT: v_mov_b32_e32 v15, s23
1995+ ; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
1996+ ; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
1997+ ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
1998+ ; SDAG-NEXT: s_nop 1
1999+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
2000+ >>>>>>> f33f71291d5d... [AMDGPU] Fix opsel for scaled MFMA operations
19672001; SDAG-NEXT: s_nop 7
19682002; SDAG-NEXT: s_nop 2
19692003; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1]
@@ -1987,8 +2021,13 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
19872021; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19]
19882022; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23]
19892023; GISEL-NEXT: s_nop 1
2024+ <<<<<<< HEAD
19902025; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, -2 op_sel_hi:[0,0,0]
19912026; GISEL-NEXT: v_mov_b32_e32 v4, 0
2027+ =======
2028+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
2029+ ; GISEL-NEXT: v_mov_b32_e32 v0, 0
2030+ >>>>>>> f33f71291d5d... [AMDGPU] Fix opsel for scaled MFMA operations
19922031; GISEL-NEXT: s_nop 7
19932032; GISEL-NEXT: s_nop 2
19942033; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
0 commit comments