|
3 | 3 |
|
4 | 4 | target triple = "amdgcn-amd-amdhsa"
|
5 | 5 |
|
| 6 | +define amdgpu_kernel void @respect_optnone(double %arg0, double %arg1, ptr addrspace(1) %ptr) #4 { |
| 7 | +; CHECK-LABEL: respect_optnone: |
| 8 | +; CHECK: ; %bb.0: ; %bb |
| 9 | +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 |
| 10 | +; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 |
| 11 | +; CHECK-NEXT: s_nop 0 |
| 12 | +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x10 |
| 13 | +; CHECK-NEXT: s_mov_b32 s6, 0x3ff |
| 14 | +; CHECK-NEXT: v_and_b32_e64 v0, v0, s6 |
| 15 | +; CHECK-NEXT: s_mov_b32 s6, 3 |
| 16 | +; CHECK-NEXT: v_lshlrev_b32_e64 v0, s6, v0 |
| 17 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 18 | +; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[4:5] |
| 19 | +; CHECK-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| 20 | +; CHECK-NEXT: v_mov_b64_e32 v[4:5], s[2:3] |
| 21 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 22 | +; CHECK-NEXT: s_nop 0 |
| 23 | +; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1] |
| 24 | +; CHECK-NEXT: s_nop 5 |
| 25 | +; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 |
| 26 | +; CHECK-NEXT: v_accvgpr_write_b32 a1, v1 |
| 27 | +; CHECK-NEXT: ;;#ASMSTART |
| 28 | +; CHECK-NEXT: ; use a[0:1] |
| 29 | +; CHECK-NEXT: ;;#ASMEND |
| 30 | +; CHECK-NEXT: s_endpgm |
| 31 | +bb: |
| 32 | + %id = call i32 @llvm.amdgcn.workitem.id.x() |
| 33 | + %gep = getelementptr double, ptr addrspace(1) %ptr, i32 %id |
| 34 | + %src2 = load double, ptr addrspace(1) %gep |
| 35 | + %mai = call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %arg0, double %arg1, double %src2, i32 0, i32 0, i32 0) |
| 36 | + call void asm sideeffect "; use $0", "a"(double %mai) |
| 37 | + ret void |
| 38 | +} |
| 39 | + |
6 | 40 | define amdgpu_kernel void @test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma(ptr addrspace(1) %arg) #0 {
|
7 | 41 | ; CHECK-LABEL: test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma:
|
8 | 42 | ; CHECK: ; %bb.0: ; %bb
|
@@ -859,3 +893,4 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-p
|
859 | 893 | attributes #1 = { mustprogress nofree norecurse nounwind willreturn "amdgpu-waves-per-eu"="8,8" }
|
860 | 894 | attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
|
861 | 895 | attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
| 896 | +attributes #4 = { nounwind noinline optnone } |
0 commit comments