|
1 | 1 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GCN,GFX9GFX10 |
2 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,GFX9GFX10 |
3 | 3 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 |
4 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 |
| 4 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16,GFX11 |
5 | 5 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 |
6 | 6 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 |
| 7 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 |
| 8 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16,GFX12 |
7 | 9 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 |
8 | 10 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 |
9 | 11 |
|
@@ -93,6 +95,21 @@ define amdgpu_kernel void @dpp_fadd_f16(ptr addrspace(1) %arg) { |
93 | 95 | ret void |
94 | 96 | } |
95 | 97 |
|
| 98 | +; GCN-LABEL: {{^}}dpp_src1_sgpr: |
| 99 | +; GFX11: v_add_nc_u16 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} |
| 100 | +; GFX12: v_add_nc_u16_e64_dpp {{v[0-9]+}}, {{v[0-9]+}}, {{s[0-9]+}} |
| 101 | +define amdgpu_kernel void @dpp_src1_sgpr(ptr addrspace(1) %out, i32 %in) { |
| 102 | + %5 = trunc i32 %in to i8 |
| 103 | + %6 = shl i8 %5, 3 |
| 104 | + %7 = sext i8 %6 to i32 |
| 105 | + %8 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %7, i32 280, i32 15, i32 15, i1 true) |
| 106 | + %9 = trunc i32 %8 to i8 |
| 107 | + %10 = add i8 %6, %9 |
| 108 | + %11 = sext i8 %10 to i32 |
| 109 | + store i32 %11, ptr addrspace(1) %out |
| 110 | + ret void |
| 111 | +} |
| 112 | + |
96 | 113 | declare i32 @llvm.amdgcn.workitem.id.x() |
97 | 114 | declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0 |
98 | 115 | declare float @llvm.ceil.f32(float) |
|
0 commit comments