Skip to content

Commit 097fbb7

Browse files
committed
Reduce testcase, remove -global-isel, drop Attrs comments and check output.
1 parent 0a47cb5 commit 097fbb7

File tree

1 file changed

+43
-93
lines changed

1 file changed

+43
-93
lines changed
Lines changed: 43 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,43 @@
1-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O1 -global-isel=true < %s
2-
3-
; Function Attrs: nounwind
4-
define amdgpu_kernel void @test_iglp_opt() #0 {
5-
entry:
6-
call void @llvm.amdgcn.iglp.opt(i32 0) #4
7-
ret void
8-
}
9-
10-
; Function Attrs: nounwind
11-
define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
12-
entry:
13-
call void @llvm.amdgcn.iglp.opt(i32 0)
14-
%idx = call i32 @llvm.amdgcn.workitem.id.x()
15-
%load.0.addr = getelementptr <32 x float>, ptr addrspace(3) %in, i32 %idx
16-
%load.0 = load <32 x float>, ptr addrspace(3) %load.0.addr, align 128
17-
%load.1.addr = getelementptr <32 x float>, ptr addrspace(3) %load.0.addr, i32 64
18-
%load.1 = load <32 x float>, ptr addrspace(3) %load.1.addr, align 128
19-
%load.2.addr = getelementptr <32 x float>, ptr addrspace(3) %load.1.addr, i32 128
20-
%load.2 = load <32 x float>, ptr addrspace(3) %load.2.addr, align 128
21-
%load.3.addr = getelementptr <32 x float>, ptr addrspace(3) %load.2.addr, i32 192
22-
%load.3 = load <32 x float>, ptr addrspace(3) %load.3.addr, align 128
23-
%load.4.addr = getelementptr <32 x float>, ptr addrspace(3) %load.3.addr, i32 256
24-
%load.4 = load <32 x float>, ptr addrspace(3) %load.4.addr, align 128
25-
%mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.0, i32 0, i32 0, i32 0)
26-
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.1, i32 0, i32 0, i32 0)
27-
%mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.2, i32 0, i32 0, i32 0)
28-
%mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.3, i32 0, i32 0, i32 0)
29-
%mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.4, i32 0, i32 0, i32 0)
30-
%store.0.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 %idx
31-
store <32 x float> %mai.0, ptr addrspace(3) %store.0.addr, align 128
32-
%store.1.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 64
33-
store <32 x float> %mai.1, ptr addrspace(3) %store.1.addr, align 128
34-
%store.2.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 128
35-
store <32 x float> %mai.2, ptr addrspace(3) %store.2.addr, align 128
36-
%store.3.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 192
37-
store <32 x float> %mai.3, ptr addrspace(3) %store.3.addr, align 128
38-
%store.4.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 256
39-
store <32 x float> %mai.4, ptr addrspace(3) %store.4.addr, align 128
40-
ret void
41-
}
42-
43-
; Function Attrs: nounwind
44-
define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
45-
entry:
46-
call void @llvm.amdgcn.iglp.opt(i32 1)
47-
%idx = call i32 @llvm.amdgcn.workitem.id.x()
48-
%load.0.addr = getelementptr <32 x float>, ptr addrspace(3) %in, i32 %idx
49-
%load.0 = load <32 x float>, ptr addrspace(3) %load.0.addr, align 128
50-
%load.1.addr = getelementptr <32 x float>, ptr addrspace(3) %load.0.addr, i32 64
51-
%load.1 = load <32 x float>, ptr addrspace(3) %load.1.addr, align 128
52-
%load.2.addr = getelementptr <32 x float>, ptr addrspace(3) %load.1.addr, i32 128
53-
%load.2 = load <32 x float>, ptr addrspace(3) %load.2.addr, align 128
54-
%load.3.addr = getelementptr <32 x float>, ptr addrspace(3) %load.2.addr, i32 192
55-
%L = load <1 x i64>, ptr addrspace(3) %load.3.addr, align 8
56-
%load.3 = load <32 x float>, ptr addrspace(3) %load.3.addr, align 128
57-
%load.4.addr = getelementptr <32 x float>, ptr addrspace(3) %load.3.addr, i32 256
58-
%L1 = load <1 x i64>, ptr addrspace(3) %load.4.addr, align 8
59-
%load.4 = load <32 x float>, ptr addrspace(3) %load.4.addr, align 128
60-
%B = urem <1 x i64> %L, %L1
61-
%mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.0, i32 0, i32 0, i32 0)
62-
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.1, i32 0, i32 0, i32 0)
63-
%mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.2, i32 0, i32 0, i32 0)
64-
%mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.3, i32 0, i32 0, i32 0)
65-
%mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %load.4, i32 0, i32 0, i32 0)
66-
%store.0.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 %idx
67-
store <32 x float> %mai.0, ptr addrspace(3) %store.0.addr, align 128
68-
%store.1.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 64
69-
store <32 x float> %mai.1, ptr addrspace(3) %store.1.addr, align 128
70-
%store.2.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 128
71-
store <32 x float> %mai.2, ptr addrspace(3) %store.2.addr, align 128
72-
%store.3.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 192
73-
store <32 x float> %mai.3, ptr addrspace(3) %store.3.addr, align 128
74-
%store.4.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 256
75-
store <32 x float> %mai.4, ptr addrspace(3) %store.4.addr, align 128
76-
store <1 x i64> %B, ptr addrspace(3) %store.3.addr, align 8
77-
ret void
78-
}
79-
80-
; Function Attrs: convergent nocallback nofree nounwind willreturn
81-
declare void @llvm.amdgcn.iglp.opt(i32 immarg) #1
82-
83-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
84-
declare i32 @llvm.amdgcn.workitem.id.x() #2
85-
86-
; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none)
87-
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32 immarg, i32 immarg, i32 immarg) #3
88-
89-
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
90-
attributes #1 = { convergent nocallback nofree nounwind willreturn }
91-
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
92-
attributes #3 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
93-
attributes #4 = { convergent nounwind }
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O1 < %s | FileCheck -check-prefix=GCN %s
3+
4+
define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(<1 x i64> %L1) {
5+
; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
6+
; GCN: ; %bb.0: ; %entry
7+
; GCN-NEXT: v_mov_b32_e32 v32, 0
8+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
9+
; GCN-NEXT: ds_read_b128 v[28:31], v32 offset:112
10+
; GCN-NEXT: ds_read_b128 v[24:27], v32 offset:96
11+
; GCN-NEXT: ds_read_b128 v[20:23], v32 offset:80
12+
; GCN-NEXT: ds_read_b128 v[16:19], v32 offset:64
13+
; GCN-NEXT: ds_read_b128 v[0:3], v32
14+
; GCN-NEXT: ds_read_b128 v[4:7], v32 offset:16
15+
; GCN-NEXT: ds_read_b128 v[8:11], v32 offset:32
16+
; GCN-NEXT: ds_read_b128 v[12:15], v32 offset:48
17+
; GCN-NEXT: v_mov_b32_e32 v34, 0
18+
; GCN-NEXT: v_mov_b32_e32 v35, v34
19+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
20+
; GCN-NEXT: s_cmp_lg_u64 s[0:1], 0
21+
; GCN-NEXT: ; iglp_opt mask(0x00000001)
22+
; GCN-NEXT: ds_write_b128 v32, v[28:31] offset:112
23+
; GCN-NEXT: ds_write_b128 v32, v[24:27] offset:96
24+
; GCN-NEXT: ds_write_b128 v32, v[20:23] offset:80
25+
; GCN-NEXT: ds_write_b128 v32, v[16:19] offset:64
26+
; GCN-NEXT: ds_write_b128 v32, v[12:15] offset:48
27+
; GCN-NEXT: ds_write_b128 v32, v[8:11] offset:32
28+
; GCN-NEXT: ds_write_b128 v32, v[4:7] offset:16
29+
; GCN-NEXT: ds_write_b128 v32, v[0:3]
30+
; GCN-NEXT: ds_write_b64 v32, v[34:35]
31+
; GCN-NEXT: s_endpgm
32+
entry:
33+
call void @llvm.amdgcn.iglp.opt(i32 1)
34+
%load.4 = load <32 x float>, ptr addrspace(3) null, align 128
35+
%B = urem <1 x i64> zeroinitializer, %L1
36+
store <32 x float> %load.4, ptr addrspace(3) null, align 128
37+
store <1 x i64> %B, ptr addrspace(3) null, align 8
38+
ret void
39+
}
40+
41+
declare void @llvm.amdgcn.iglp.opt(i32 immarg) #0
42+
43+
attributes #0 = { convergent nocallback nofree nounwind willreturn }

0 commit comments

Comments
 (0)