Skip to content

Commit 3294cdd

Browse files
authored
AMDGPU: Add test for mfma rewrite pass respecting optnone (#153025)
1 parent fdede21 commit 3294cdd

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,40 @@
33

44
target triple = "amdgcn-amd-amdhsa"
55

6+
define amdgpu_kernel void @respect_optnone(double %arg0, double %arg1, ptr addrspace(1) %ptr) #4 {
7+
; CHECK-LABEL: respect_optnone:
8+
; CHECK: ; %bb.0: ; %bb
9+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
10+
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8
11+
; CHECK-NEXT: s_nop 0
12+
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x10
13+
; CHECK-NEXT: s_mov_b32 s6, 0x3ff
14+
; CHECK-NEXT: v_and_b32_e64 v0, v0, s6
15+
; CHECK-NEXT: s_mov_b32 s6, 3
16+
; CHECK-NEXT: v_lshlrev_b32_e64 v0, s6, v0
17+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
18+
; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[4:5]
19+
; CHECK-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
20+
; CHECK-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
21+
; CHECK-NEXT: s_waitcnt vmcnt(0)
22+
; CHECK-NEXT: s_nop 0
23+
; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1]
24+
; CHECK-NEXT: s_nop 5
25+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
26+
; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
27+
; CHECK-NEXT: ;;#ASMSTART
28+
; CHECK-NEXT: ; use a[0:1]
29+
; CHECK-NEXT: ;;#ASMEND
30+
; CHECK-NEXT: s_endpgm
31+
bb:
32+
%id = call i32 @llvm.amdgcn.workitem.id.x()
33+
%gep = getelementptr double, ptr addrspace(1) %ptr, i32 %id
34+
%src2 = load double, ptr addrspace(1) %gep
35+
%mai = call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %arg0, double %arg1, double %src2, i32 0, i32 0, i32 0)
36+
call void asm sideeffect "; use $0", "a"(double %mai)
37+
ret void
38+
}
39+
640
define amdgpu_kernel void @test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma(ptr addrspace(1) %arg) #0 {
741
; CHECK-LABEL: test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma:
842
; CHECK: ; %bb.0: ; %bb
@@ -859,3 +893,4 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-p
859893
attributes #1 = { mustprogress nofree norecurse nounwind willreturn "amdgpu-waves-per-eu"="8,8" }
860894
attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
861895
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
896+
attributes #4 = { nounwind noinline optnone }

0 commit comments

Comments
 (0)