Skip to content

Commit 0ca6812

Browse files
Update the scriptable tests
1 parent fdf4f03 commit 0ca6812

17 files changed

+7769
-5605
lines changed

llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll

Lines changed: 173 additions & 257 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll

Lines changed: 45 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 {
1414
; GFX908-LABEL: test_mfma_f32_32x32x2bf16:
1515
; GFX908: ; %bb.0: ; %bb
1616
; GFX908-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
17-
; GFX908-NEXT: v_mov_b32_e32 v4, 0
17+
; GFX908-NEXT: v_mov_b32_e32 v29, 0
1818
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
1919
; GFX908-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0
2020
; GFX908-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40
@@ -24,13 +24,11 @@ define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 {
2424
; GFX908-NEXT: v_mov_b32_e32 v2, s18
2525
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
2626
; GFX908-NEXT: v_accvgpr_write_b32 a1, v1
27+
; GFX908-NEXT: v_mov_b32_e32 v0, s22
28+
; GFX908-NEXT: v_mov_b32_e32 v1, s23
2729
; GFX908-NEXT: v_accvgpr_write_b32 a2, v2
28-
; GFX908-NEXT: v_mov_b32_e32 v0, s21
29-
; GFX908-NEXT: v_mov_b32_e32 v1, s22
30-
; GFX908-NEXT: v_mov_b32_e32 v2, s23
31-
; GFX908-NEXT: v_accvgpr_write_b32 a5, v0
32-
; GFX908-NEXT: v_accvgpr_write_b32 a6, v1
33-
; GFX908-NEXT: v_accvgpr_write_b32 a7, v2
30+
; GFX908-NEXT: v_accvgpr_write_b32 a6, v0
31+
; GFX908-NEXT: v_accvgpr_write_b32 a7, v1
3432
; GFX908-NEXT: v_mov_b32_e32 v0, s24
3533
; GFX908-NEXT: v_mov_b32_e32 v1, s25
3634
; GFX908-NEXT: v_mov_b32_e32 v2, s26
@@ -71,7 +69,8 @@ define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 {
7169
; GFX908-NEXT: v_mov_b32_e32 v0, s10
7270
; GFX908-NEXT: v_mov_b32_e32 v1, s11
7371
; GFX908-NEXT: v_mov_b32_e32 v2, s12
74-
; GFX908-NEXT: v_mov_b32_e32 v5, s20
72+
; GFX908-NEXT: v_mov_b32_e32 v4, s20
73+
; GFX908-NEXT: v_mov_b32_e32 v5, s21
7574
; GFX908-NEXT: v_accvgpr_write_b32 a3, v3
7675
; GFX908-NEXT: v_accvgpr_write_b32 a26, v0
7776
; GFX908-NEXT: v_accvgpr_write_b32 a27, v1
@@ -80,7 +79,8 @@ define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 {
8079
; GFX908-NEXT: v_mov_b32_e32 v1, s14
8180
; GFX908-NEXT: v_mov_b32_e32 v2, s15
8281
; GFX908-NEXT: v_mov_b32_e32 v3, 1
83-
; GFX908-NEXT: v_accvgpr_write_b32 a4, v5
82+
; GFX908-NEXT: v_accvgpr_write_b32 a4, v4
83+
; GFX908-NEXT: v_accvgpr_write_b32 a5, v5
8484
; GFX908-NEXT: v_accvgpr_write_b32 a29, v0
8585
; GFX908-NEXT: v_accvgpr_write_b32 a30, v1
8686
; GFX908-NEXT: v_accvgpr_write_b32 a31, v2
@@ -94,57 +94,42 @@ define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 {
9494
; GFX908-NEXT: v_accvgpr_read_b32 v2, a26
9595
; GFX908-NEXT: v_accvgpr_read_b32 v1, a25
9696
; GFX908-NEXT: v_accvgpr_read_b32 v0, a24
97-
; GFX908-NEXT: s_nop 1
98-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:96
99-
; GFX908-NEXT: s_nop 0
100-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a31
101-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a30
102-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a29
103-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a28
104-
; GFX908-NEXT: s_nop 1
105-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:112
106-
; GFX908-NEXT: s_nop 0
107-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a19
108-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a18
109-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a17
110-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a16
111-
; GFX908-NEXT: s_nop 1
112-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:64
113-
; GFX908-NEXT: s_nop 0
114-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a23
115-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a22
116-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a21
117-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a20
118-
; GFX908-NEXT: s_nop 1
119-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:80
120-
; GFX908-NEXT: s_nop 0
121-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a11
122-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a10
123-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a9
124-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a8
125-
; GFX908-NEXT: s_nop 1
126-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:32
127-
; GFX908-NEXT: s_nop 0
128-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a15
129-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a14
130-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a13
131-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a12
132-
; GFX908-NEXT: s_nop 1
133-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:48
134-
; GFX908-NEXT: s_nop 0
135-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a3
136-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a2
137-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a1
138-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a0
139-
; GFX908-NEXT: s_nop 1
140-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35]
141-
; GFX908-NEXT: s_nop 0
142-
; GFX908-NEXT: v_accvgpr_read_b32 v3, a7
143-
; GFX908-NEXT: v_accvgpr_read_b32 v2, a6
144-
; GFX908-NEXT: v_accvgpr_read_b32 v1, a5
145-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a4
146-
; GFX908-NEXT: s_nop 1
147-
; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[34:35] offset:16
97+
; GFX908-NEXT: v_accvgpr_read_b32 v4, a31
98+
; GFX908-NEXT: v_accvgpr_read_b32 v8, a19
99+
; GFX908-NEXT: global_store_dwordx4 v29, v[0:3], s[34:35] offset:96
100+
; GFX908-NEXT: v_accvgpr_read_b32 v12, a23
101+
; GFX908-NEXT: v_accvgpr_read_b32 v3, a30
102+
; GFX908-NEXT: v_accvgpr_read_b32 v2, a29
103+
; GFX908-NEXT: v_accvgpr_read_b32 v1, a28
104+
; GFX908-NEXT: v_accvgpr_read_b32 v16, a11
105+
; GFX908-NEXT: v_accvgpr_read_b32 v20, a15
106+
; GFX908-NEXT: v_accvgpr_read_b32 v24, a3
107+
; GFX908-NEXT: v_accvgpr_read_b32 v28, a7
108+
; GFX908-NEXT: v_accvgpr_read_b32 v7, a18
109+
; GFX908-NEXT: v_accvgpr_read_b32 v6, a17
110+
; GFX908-NEXT: v_accvgpr_read_b32 v5, a16
111+
; GFX908-NEXT: v_accvgpr_read_b32 v11, a22
112+
; GFX908-NEXT: v_accvgpr_read_b32 v10, a21
113+
; GFX908-NEXT: v_accvgpr_read_b32 v9, a20
114+
; GFX908-NEXT: v_accvgpr_read_b32 v15, a10
115+
; GFX908-NEXT: v_accvgpr_read_b32 v14, a9
116+
; GFX908-NEXT: v_accvgpr_read_b32 v13, a8
117+
; GFX908-NEXT: v_accvgpr_read_b32 v19, a14
118+
; GFX908-NEXT: v_accvgpr_read_b32 v18, a13
119+
; GFX908-NEXT: v_accvgpr_read_b32 v17, a12
120+
; GFX908-NEXT: v_accvgpr_read_b32 v23, a2
121+
; GFX908-NEXT: v_accvgpr_read_b32 v22, a1
122+
; GFX908-NEXT: v_accvgpr_read_b32 v21, a0
123+
; GFX908-NEXT: v_accvgpr_read_b32 v27, a6
124+
; GFX908-NEXT: v_accvgpr_read_b32 v26, a5
125+
; GFX908-NEXT: v_accvgpr_read_b32 v25, a4
126+
; GFX908-NEXT: global_store_dwordx4 v29, v[1:4], s[34:35] offset:112
127+
; GFX908-NEXT: global_store_dwordx4 v29, v[5:8], s[34:35] offset:64
128+
; GFX908-NEXT: global_store_dwordx4 v29, v[9:12], s[34:35] offset:80
129+
; GFX908-NEXT: global_store_dwordx4 v29, v[13:16], s[34:35] offset:32
130+
; GFX908-NEXT: global_store_dwordx4 v29, v[17:20], s[34:35] offset:48
131+
; GFX908-NEXT: global_store_dwordx4 v29, v[21:24], s[34:35]
132+
; GFX908-NEXT: global_store_dwordx4 v29, v[25:28], s[34:35] offset:16
148133
; GFX908-NEXT: s_endpgm
149134
;
150135
; GFX90A-LABEL: test_mfma_f32_32x32x2bf16:

0 commit comments

Comments
 (0)