Skip to content

Commit d1571dd

Browse files
committed
[WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU
1 parent 310ed2b commit d1571dd

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")
635635
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
636636
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
637637

638+
//===----------------------------------------------------------------------===//
639+
// Image builtins
640+
//===----------------------------------------------------------------------===//
641+
BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")
642+
638643
#undef BUILTIN
639644
#undef TARGET_BUILTIN

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
683683

684684
return Builder.CreateInsertElement(I0, A, 1);
685685
}
686+
case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
687+
llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
688+
llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
689+
690+
llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
691+
llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
692+
llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
693+
llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
694+
llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
695+
llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);
696+
697+
SmallVector<Value *, 6> ArgTys;
698+
ArgTys.push_back(imm0);
699+
ArgTys.push_back(arg0);
700+
ArgTys.push_back(arg1);
701+
ArgTys.push_back(arg2);
702+
ArgTys.push_back(imm1);
703+
ArgTys.push_back(imm2);
704+
705+
llvm::CallInst *Call =
706+
Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys);
707+
708+
return Call;
709+
}
686710
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
687711
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
688712
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s
3+
4+
#pragma OPENCL EXTENSION cl_khr_fp64:enable
5+
6+
typedef int v8i __attribute__((ext_vector_type(8)));
7+
8+
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
9+
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
10+
// CHECK-NEXT: [[ENTRY:.*:]]
11+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
12+
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
13+
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
14+
// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
15+
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
16+
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
17+
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
18+
// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr
19+
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
20+
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
21+
// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32
22+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
23+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
24+
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32
25+
// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0)
26+
// CHECK-NEXT: ret float [[TMP3]]
27+
//
28+
float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
29+
30+
return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
31+
}

0 commit comments

Comments
 (0)