|
1 | 1 | ; RUN: llc -mcpu=gfx942 < %s | FileCheck %s |
2 | | -; CHECK-LABEL: test_mfma |
3 | | -; CHECK: v_add_f32_e32 |
4 | | -; CHECK-NEXT: ;;#ASMSTART |
5 | | -; CHECK-NEXT: v_mfma_f64 |
6 | | -; CHECK-NEXT: ;;#ASMEND |
7 | | -; CHECK: v_add_f32_e32 |
8 | 2 | ; ModuleID = '<stdin>' |
9 | 3 | target triple = "amdgcn-amd-amdhsa" |
10 | 4 |
|
11 | 5 | ; Function Attrs: convergent mustprogress norecurse nounwind |
12 | 6 | define protected amdgpu_kernel void @test_valu(ptr addrspace(1) noalias noundef writeonly captures(none) %to.coerce, ptr addrspace(1) noalias noundef readonly captures(none) %from.coerce, i32 noundef %k, ptr addrspace(1) noundef writeonly captures(none) %ret.coerce, i32 noundef %length) local_unnamed_addr #0 { |
| 7 | +; CHECK-LABEL: test_valu |
| 8 | +; CHECK: s_mul_i32 |
| 9 | +; CHECK: ASMSTART |
13 | 10 | entry: |
14 | 11 | %0 = tail call i32 @llvm.amdgcn.workgroup.id.x() |
15 | 12 | %mul = shl i32 %0, 6 |
@@ -44,8 +41,55 @@ if.end: ; preds = %if.then, %entry |
44 | 41 | ret void |
45 | 42 | } |
46 | 43 |
|
| 44 | +; Function Attrs: convergent mustprogress norecurse nounwind |
| 45 | +define protected amdgpu_kernel void @test_salu(ptr addrspace(1) noalias noundef writeonly captures(none) %to.coerce, ptr addrspace(1) noalias noundef readonly captures(none) %from.coerce, i32 noundef %k, ptr addrspace(1) noundef writeonly captures(none) %ret.coerce, i32 noundef %length) local_unnamed_addr #0 { |
| 46 | +; CHECK-LABEL: test_salu |
| 47 | +; CHECK: %bb.1 |
| 48 | +; CHECK-NEXT: s_load |
| 49 | +; CHECK-NEXT: s_load |
| 50 | +; CHECK-NEXT: s_waitcnt |
| 51 | +; CHECK-NEXT: ASMSTART |
| 52 | +entry: |
| 53 | + %0 = tail call i32 @llvm.amdgcn.workgroup.id.x() |
| 54 | + %mul = shl i32 %0, 6 |
| 55 | + %1 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| 56 | + %add = add i32 %mul, %1 |
| 57 | + %cmp = icmp slt i32 %add, %length |
| 58 | + br i1 %cmp, label %if.then, label %if.end |
| 59 | + |
| 60 | +if.then: ; preds = %entry |
| 61 | + %idx.ext = sext i32 %add to i64 |
| 62 | + %add.ptr = getelementptr inbounds float, ptr addrspace(1) %to.coerce, i64 %idx.ext |
| 63 | + %mul4 = shl nsw i32 %add, 2 |
| 64 | + %idx.ext5 = sext i32 %mul4 to i64 |
| 65 | + %add.ptr6 = getelementptr inbounds float, ptr addrspace(1) %from.coerce, i64 %idx.ext5 |
| 66 | + %2 = load <4 x float>, ptr addrspace(1) %add.ptr6, align 16 |
| 67 | + %3 = extractelement <4 x float> %2, i64 3 |
| 68 | + %4 = extractelement <4 x float> %2, i64 0 |
| 69 | + %5 = fadd contract float %3, %4 |
| 70 | + %6 = extractelement <4 x float> %2, i64 1 |
| 71 | + %7 = extractelement <4 x float> %2, i64 2 |
| 72 | + %add7 = fadd contract float %6, %7 |
| 73 | + %add8 = fadd contract float %5, %add7 |
| 74 | + store float %add8, ptr addrspace(1) %add.ptr, align 4 |
| 75 | + %mul9 = tail call noundef i32 asm "s_mul_i32, $0, $1, 3", "=s,s"(i32 %k) #3 |
| 76 | + store i32 %mul9, ptr addrspace(1) %ret.coerce, align 4 |
| 77 | + tail call void @llvm.amdgcn.sched.group.barrier(i32 4, i32 1, i32 0) |
| 78 | + tail call void @llvm.amdgcn.sched.group.barrier(i32 2, i32 10, i32 0) |
| 79 | + br label %if.end |
| 80 | + |
| 81 | +if.end: ; preds = %if.then, %entry |
| 82 | + ret void |
| 83 | +} |
| 84 | + |
47 | 85 | ; Function Attrs: convergent mustprogress norecurse nounwind |
48 | 86 | define protected amdgpu_kernel void @test_mfma(ptr addrspace(1) noalias noundef writeonly captures(none) %to.coerce, ptr addrspace(1) noalias noundef readonly captures(none) %from.coerce, i32 noundef %length) local_unnamed_addr #0 { |
| 87 | +; CHECK-LABEL: test_mfma |
| 88 | +; CHECK: v_add_f32_e32 |
| 89 | +; CHECK-NEXT: ;;#ASMSTART |
| 90 | +; CHECK-NEXT: v_mfma_f64 |
| 91 | +; CHECK-NEXT: ;;#ASMEND |
| 92 | +; CHECK: v_add_f32_e32 |
49 | 93 | entry: |
50 | 94 | %0 = tail call i32 @llvm.amdgcn.workgroup.id.x() |
51 | 95 | %mul = shl i32 %0, 6 |
@@ -88,6 +132,10 @@ if.end: ; preds = %if.then, %entry |
88 | 132 |
|
89 | 133 | ; Function Attrs: convergent mustprogress norecurse nounwind |
90 | 134 | define protected amdgpu_kernel void @test_ds(ptr addrspace(1) noalias noundef writeonly captures(none) %to.coerce, ptr addrspace(1) noalias noundef readonly captures(none) %from.coerce, i32 noundef %length) local_unnamed_addr #0 { |
| 135 | +; CHECK-LABEL: test_ds |
| 136 | +; CHECK-DAG: v_lshl_add_u64 |
| 137 | +; CHECK-DAG: v_add_f32_e32 |
| 138 | +; CHECK-NEXT: ASMSTART |
91 | 139 | entry: |
92 | 140 | %0 = tail call i32 @llvm.amdgcn.workgroup.id.x() |
93 | 141 | %mul = shl i32 %0, 6 |
|
0 commit comments