Skip to content

Commit 23b3dfc

Browse files
[clang][amdgpu] Add builtins for raw/struct buffer lds load
1 parent 064f9d0 commit 23b3dfc

8 files changed

+102
-4
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,10 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
163163
BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
164164
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
165165

166+
TARGET_BUILTIN(__builtin_amdgcn_raw_buffer_load_lds, "vV4Uiv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
166167
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
168+
TARGET_BUILTIN(__builtin_amdgcn_struct_buffer_load_lds, "vV4Uiv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
169+
TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
167170

168171
//===----------------------------------------------------------------------===//
169172
// Ballot builtins.

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,11 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
3535
Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
3636

3737
switch (BuiltinID) {
38+
case AMDGPU::BI__builtin_amdgcn_global_load_lds:
39+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_lds:
3840
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
39-
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
41+
case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
42+
case AMDGPU::BI__builtin_amdgcn_struct_buffer_load_lds: {
4043
constexpr const int SizeIdx = 2;
4144
llvm::APSInt Size;
4245
Expr *ArgExpr = TheCall->getArg(SizeIdx);

clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@
22
// REQUIRES: amdgpu-registered-target
33
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
44

5+
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
6+
7+
// CHECK-LABEL: @test_amdgcn_raw_buffer_load_lds(
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
10+
// CHECK-NEXT: ret void
11+
//
12+
void test_amdgcn_raw_buffer_load_lds(v4u32 rsrc, __local void * lds, int offset, int soffset) {
13+
__builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
14+
}
15+
516
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
617
// CHECK-NEXT: entry:
718
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
@@ -10,3 +21,21 @@
1021
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
1122
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
1223
}
24+
25+
// CHECK-LABEL: @test_amdgcn_struct_buffer_load_lds(
26+
// CHECK-NEXT: entry:
27+
// CHECK-NEXT: tail call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
28+
// CHECK-NEXT: ret void
29+
//
30+
void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) {
31+
__builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3);
32+
}
33+
34+
// CHECK-LABEL: @test_amdgcn_struct_ptr_buffer_load_lds(
35+
// CHECK-NEXT: entry:
36+
// CHECK-NEXT: tail call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
37+
// CHECK-NEXT: ret void
38+
//
39+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) {
40+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3);
41+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -verify -o - %s
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S -verify -o - %s
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S -verify -o - %s
4+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
5+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s
6+
// REQUIRES: amdgpu-registered-target
7+
8+
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
9+
10+
void test_amdgcn_raw_buffer_load_lds(v4u32 rsrc, __local void* lds, int offset, int soffset, int x) {
11+
__builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
12+
}

clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,32 @@
22
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s
33
// REQUIRES: amdgpu-registered-target
44

5+
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
6+
7+
void test_amdgcn_raw_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
8+
__builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a constant integer}}
9+
__builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a constant integer}}
10+
__builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a constant integer}}
11+
__builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
12+
}
13+
514
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
615
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
716
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
817
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
918
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
1019
}
20+
21+
void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void* lds, int index, int offset, int soffset, int x) {
22+
__builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, x, index, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}}
23+
__builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}}
24+
__builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}}
25+
__builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 3, index, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
26+
}
27+
28+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int index, int offset, int soffset, int x) {
29+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, index, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
30+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
31+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
32+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, index, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
33+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -verify -o - %s
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S -verify -o - %s
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S -verify -o - %s
4+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
5+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s
6+
// REQUIRES: amdgpu-registered-target
7+
8+
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
9+
10+
void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void* lds, int index, int offset, int soffset, int x) {
11+
__builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
12+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -verify -o - %s
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S -verify -o - %s
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S -verify -o - %s
4+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
5+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s
6+
// REQUIRES: amdgpu-registered-target
7+
8+
void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int index, int offset, int soffset, int x) {
9+
__builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
10+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1861,7 +1861,9 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
18611861
ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
18621862
AMDGPURsrcIntrinsic<1>;
18631863

1864-
class AMDGPURawBufferLoadLDS : Intrinsic <
1864+
class AMDGPURawBufferLoadLDS :
1865+
ClangBuiltin<"__builtin_amdgcn_raw_buffer_load_lds">,
1866+
Intrinsic <
18651867
[],
18661868
[llvm_v4i32_ty, // rsrc(SGPR)
18671869
LLVMQualPointerType<3>, // LDS base offset
@@ -1904,7 +1906,9 @@ class AMDGPURawPtrBufferLoadLDS :
19041906
ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
19051907
def int_amdgcn_raw_ptr_buffer_load_lds : AMDGPURawPtrBufferLoadLDS;
19061908

1907-
class AMDGPUStructBufferLoadLDS : Intrinsic <
1909+
class AMDGPUStructBufferLoadLDS :
1910+
ClangBuiltin<"__builtin_amdgcn_struct_buffer_load_lds">,
1911+
Intrinsic <
19081912
[],
19091913
[llvm_v4i32_ty, // rsrc(SGPR)
19101914
LLVMQualPointerType<3>, // LDS base offset
@@ -1924,7 +1928,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
19241928
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
19251929
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
19261930

1927-
class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
1931+
class AMDGPUStructPtrBufferLoadLDS :
1932+
ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">,
1933+
Intrinsic <
19281934
[],
19291935
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
19301936
LLVMQualPointerType<3>, // LDS base offset

0 commit comments

Comments
 (0)