-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Support for type inferring image load/store builtins for AMDGPU #140210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Rana Pratap Reddy (ranapratap55) ChangesThis is an initial patch adds support for __builtin_amdgcn_image_load_2d_f32_i32 which lowers to intrinsic llvm.amdgcn.image.load.2d.f32.i32 Full diff: https://github.com/llvm/llvm-project/pull/140210.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 39fef9e4601f8..67045809fa726 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
+//===----------------------------------------------------------------------===//
+// Image builtins
+//===----------------------------------------------------------------------===//
+BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index ad012d98635ff..bca5954bdda8b 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -683,6 +683,29 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+ llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+ llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+ llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
+ llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
+ llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
+ llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
+ llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
+ llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);
+
+ SmallVector<Value*, 6> ArgTys;
+ ArgTys.push_back(imm0);
+ ArgTys.push_back(arg0);
+ ArgTys.push_back(arg1);
+ ArgTys.push_back(arg2);
+ ArgTys.push_back(imm1);
+ ArgTys.push_back(imm2);
+
+ llvm::CallInst *Call = Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys);
+
+ return Call;
+ }
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c
new file mode 100644
index 0000000000000..78dab461c1f38
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT: ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+}
|
@llvm/pr-subscribers-backend-amdgpu Author: Rana Pratap Reddy (ranapratap55) ChangesThis is an initial patch adds support for __builtin_amdgcn_image_load_2d_f32_i32 which lowers to intrinsic llvm.amdgcn.image.load.2d.f32.i32 Full diff: https://github.com/llvm/llvm-project/pull/140210.diff 3 Files Affected:
<html>
<head>
<meta content="origin" name="referrer">
<title>Rate limit · GitHub</title>
<meta name="viewport" content="width=device-width">
<style type="text/css" media="screen">
body {
background-color: #f6f8fa;
color: rgba(0, 0, 0, 0.5);
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
}
.c { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; }
a { text-decoration: none; }
a:hover { text-decoration: underline; }
h1 { color: #24292e; line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; }
p { margin: 20px 0 40px; }
#s { margin-top: 35px; }
#s a {
color: #666666;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
}
</style>
</head>
<body>
<div class="c">
<h1>Access has been restricted</h1>
<p>You have triggered a rate limit.<br><br>
Please wait a few minutes before you try again;<br>
in some cases this may take up to an hour.
</p>
<div id="s">
<a href="https://support.github.com">Contact Support</a> —
<a href="https://githubstatus.com">GitHub Status</a> —
<a href="https://twitter.com/githubstatus">@githubstatus</a>
</div>
</div>
</body>
</html>
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
730b6c9
to
d1571dd
Compare
@arsenm ping. |
@shiltian ping. |
If the idea here is to remove the need for https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/ockl/src/base-image-intrinsics.ll and https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/ockl/src/extended-image-intrinsics.ll then we do need the +extended-image-insts target feature attribute as indicated in those files. |
Adding a new builtin type for AMDGPU's image descriptor rsrc data type This requires for #140210
…(#160258) Adding a new builtin type for AMDGPU's image descriptor rsrc data type This requires for llvm/llvm-project#140210
Adding a new builtin type for AMDGPU's image descriptor rsrc data type This requires for llvm#140210
…ad/store and adds 'image-insts' feature
1e9cbed
to
46c833f
Compare
rebased and using the __amdgpu_texture_t type for v8i32. |
bd7427c
to
92d4a35
Compare
92d4a35
to
44e69e1
Compare
8337bc5
to
fb0dc56
Compare
This PR introduces the builtins for amdgcn_image_load/store/sample.
Moving from device-libs( https://github.com/ROCm/llvm-project/blob/amd-staging/amd/device-libs/ockl/src/base-image-intrinsics.ll) to clang.