Skip to content

Commit 9cc15d5

Browse files
committed
Fixed subdword and complex handling, added tests
1 parent 5485732 commit 9cc15d5

File tree

4 files changed

+90
-6
lines changed

4 files changed

+90
-6
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19043,20 +19043,31 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1904319043
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1904419044
assert(Error == ASTContext::GE_None && "Should not codegen an error");
1904519045
llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19046+
unsigned Size = DataTy->getPrimitiveSizeInBits();
1904619047
llvm::Type *IntTy = llvm::IntegerType::get(
19047-
Builder.getContext(), DataTy->getPrimitiveSizeInBits());
19048+
Builder.getContext(), std::max(Size, 32u));
1904819049
Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, IntTy);
1904919050
assert(E->getNumArgs() == 5 || E->getNumArgs() == 6);
1905019051
bool InsertOld = E->getNumArgs() == 5;
1905119052
if (InsertOld)
1905219053
Args.push_back(llvm::PoisonValue::get(IntTy));
1905319054
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
1905419055
llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19056+
if (I <= !InsertOld && Size < 32) {
19057+
if (!DataTy->isIntegerTy())
19058+
V = Builder.CreateBitCast(
19059+
V, llvm::IntegerType::get(Builder.getContext(), Size));
19060+
V = Builder.CreateZExtOrBitCast(V, IntTy);
19061+
}
1905519062
llvm::Type *ExpTy =
1905619063
F->getFunctionType()->getFunctionParamType(I + InsertOld);
1905719064
Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
1905819065
}
19059-
return Builder.CreateBitCast(Builder.CreateCall(F, Args), DataTy);
19066+
Value *V = Builder.CreateCall(F, Args);
19067+
if (Size < 32 && !DataTy->isIntegerTy())
19068+
V = Builder.CreateTrunc(
19069+
V, llvm::IntegerType::get(Builder.getContext(), Size));
19070+
return Builder.CreateTruncOrBitCast(V, DataTy);
1906019071
}
1906119072
case AMDGPU::BI__builtin_amdgcn_permlane16:
1906219073
case AMDGPU::BI__builtin_amdgcn_permlanex16:

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
6868
return true;
6969
Expr *ValArg = TheCall->getArg(0);
7070
QualType Ty = ValArg->getType();
71-
if (!Ty->isArithmeticType()) {
71+
// TODO: Vectors can also be supported.
72+
if (!Ty->isArithmeticType() || Ty->isAnyComplexType()) {
7273
SemaRef.Diag(ValArg->getBeginLoc(),
7374
diag::err_typecheck_cond_expect_int_float)
7475
<< Ty << ValArg->getSourceRange();
@@ -82,7 +83,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
8283
for (unsigned I = 0; I != 2; ++I) {
8384
Expr *ValArg = TheCall->getArg(I);
8485
QualType Ty = ValArg->getType();
85-
if (!Ty->isArithmeticType()) {
86+
// TODO: Vectors can also be supported.
87+
if (!Ty->isArithmeticType() || Ty->isAnyComplexType()) {
8688
SemaRef.Diag(ValArg->getBeginLoc(),
8789
diag::err_typecheck_cond_expect_int_float)
8890
<< Ty << ValArg->getSourceRange();

clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,34 @@ void test_mov_dpp_double(double x, global double *p) {
132132
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
133133
}
134134

135+
// CHECK-LABEL: @test_mov_dpp_short
136+
// CHECK: %0 = zext i16 %x to i32
137+
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
138+
// CHECK-NEXT: %2 = trunc i32 %1 to i16
139+
// CHECK-NEXT: store i16 %2,
140+
void test_mov_dpp_short(short x, global short *p) {
141+
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
142+
}
143+
144+
// CHECK-LABEL: @test_mov_dpp_char
145+
// CHECK: %0 = zext i8 %x to i32
146+
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
147+
// CHECK-NEXT: %2 = trunc i32 %1 to i8
148+
// CHECK-NEXT: store i8 %2,
149+
void test_mov_dpp_char(char x, global char *p) {
150+
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
151+
}
152+
153+
// CHECK-LABEL: @test_mov_dpp_half
154+
// CHECK: %0 = load i16,
155+
// CHECK: %1 = zext i16 %0 to i32
156+
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false)
157+
// CHECK-NEXT: %3 = trunc i32 %2 to i16
158+
// CHECK-NEXT: store i16 %3,
159+
void test_mov_dpp_half(half *x, global half *p) {
160+
*p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
161+
}
162+
135163
// CHECK-LABEL: @test_update_dpp_int
136164
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
137165
void test_update_dpp_int(global int* out, int arg1, int arg2)
@@ -162,6 +190,34 @@ void test_update_dpp_double(double x, global double *p) {
162190
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
163191
}
164192

193+
// CHECK-LABEL: @test_update_dpp_short
194+
// CHECK: %0 = zext i16 %x to i32
195+
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
196+
// CHECK-NEXT: %2 = trunc i32 %1 to i16
197+
// CHECK-NEXT: store i16 %2,
198+
void test_update_dpp_short(short x, global short *p) {
199+
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
200+
}
201+
202+
// CHECK-LABEL: @test_update_dpp_char
203+
// CHECK: %0 = zext i8 %x to i32
204+
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
205+
// CHECK-NEXT: %2 = trunc i32 %1 to i8
206+
// CHECK-NEXT: store i8 %2,
207+
void test_update_dpp_char(char x, global char *p) {
208+
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
209+
}
210+
211+
// CHECK-LABEL: @test_update_dpp_half
212+
// CHECK: %0 = load i16,
213+
// CHECK: %1 = zext i16 %0 to i32
214+
// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false)
215+
// CHECK-NEXT: %3 = trunc i32 %2 to i16
216+
// CHECK-NEXT: store i16 %3,
217+
void test_update_dpp_half(half *x, global half *p) {
218+
*p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
219+
}
220+
165221
// CHECK-LABEL: @test_ds_fadd
166222
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
167223
// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}

clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33

44
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
55

6+
typedef int int2 __attribute__((ext_vector_type(2)));
7+
8+
struct S {
9+
int x;
10+
};
11+
612
void test_gfx9_fmed3h(global half *out, half a, half b, half c)
713
{
814
*out = __builtin_amdgcn_fmed3h(a, b, c); // expected-error {{'__builtin_amdgcn_fmed3h' needs target feature gfx9-insts}}
915
}
1016

11-
void test_mov_dpp(global int* out, int src, int i)
17+
void test_mov_dpp(global int* out, int src, int i, int2 i2, struct S s, float _Complex fc)
1218
{
1319
*out = __builtin_amdgcn_mov_dpp(src, i, 0, 0, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
1420
*out = __builtin_amdgcn_mov_dpp(src, 0, i, 0, false); // expected-error{{argument to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
@@ -22,9 +28,12 @@ void test_mov_dpp(global int* out, int src, int i)
2228
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false, 1); // expected-error{{too many arguments to function call, expected at most 5, have 6}}
2329
*out = __builtin_amdgcn_mov_dpp(out, 0, 0, 0, false); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
2430
*out = __builtin_amdgcn_mov_dpp("aa", 0, 0, 0, false); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
31+
*out = __builtin_amdgcn_mov_dpp(i2, 0, 0, 0, false); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
32+
*out = __builtin_amdgcn_mov_dpp(s, 0, 0, 0, false); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
33+
*out = __builtin_amdgcn_mov_dpp(fc, 0, 0, 0, false); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
2534
}
2635

27-
void test_update_dpp(global int* out, int arg1, int arg2, int i)
36+
void test_update_dpp(global int* out, int arg1, int arg2, int i, int2 i2, struct S s, float _Complex fc)
2837
{
2938
*out = __builtin_amdgcn_update_dpp(arg1, arg2, i, 0, 0, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
3039
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, i, 0, false); // expected-error{{argument to '__builtin_amdgcn_update_dpp' must be a constant integer}}
@@ -40,4 +49,10 @@ void test_update_dpp(global int* out, int arg1, int arg2, int i)
4049
*out = __builtin_amdgcn_update_dpp(arg1, out, 0, 0, 0, false); // expected-error{{used type '__global int *__private' where integer or floating point type is required}}
4150
*out = __builtin_amdgcn_update_dpp("aa", arg2, 0, 0, 0, false); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
4251
*out = __builtin_amdgcn_update_dpp(arg1, "aa", 0, 0, 0, false); // expected-error{{used type '__constant char[3]' where integer or floating point type is required}}
52+
*out = __builtin_amdgcn_update_dpp(i2, arg2, 0, 0, 0, false); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
53+
*out = __builtin_amdgcn_update_dpp(arg1, i2, 0, 0, 0, false); // expected-error{{used type '__private int2' (vector of 2 'int' values) where integer or floating point type is required}}
54+
*out = __builtin_amdgcn_update_dpp(s, arg2, 0, 0, 0, false); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
55+
*out = __builtin_amdgcn_update_dpp(arg1, s, 0, 0, 0, false); // expected-error{{used type '__private struct S' where integer or floating point type is required}}
56+
*out = __builtin_amdgcn_update_dpp(fc, arg2, 0, 0, 0, false); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
57+
*out = __builtin_amdgcn_update_dpp(arg1, fc, 0, 0, 0, false); // expected-error{{used type '__private _Complex float' where integer or floating point type is required}}
4358
}

0 commit comments

Comments
 (0)