diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 03b8d16b76e0d..bff48f2e16524 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -20003,37 +20003,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); llvm::Type *DataTy = ConvertType(E->getArg(0)->getType()); - unsigned Size = DataTy->getPrimitiveSizeInBits(); - llvm::Type *IntTy = - llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u)); Function *F = CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8 ? Intrinsic::amdgcn_mov_dpp8 : Intrinsic::amdgcn_update_dpp, - IntTy); + DataTy); assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 || E->getNumArgs() == 2); bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp; if (InsertOld) - Args.push_back(llvm::PoisonValue::get(IntTy)); - for (unsigned I = 0; I != E->getNumArgs(); ++I) { + Args.push_back(llvm::PoisonValue::get(DataTy)); + Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, 0, E)); + for (unsigned I = 1; I != E->getNumArgs(); ++I) { llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E); - if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) && - Size < 32) { - if (!DataTy->isIntegerTy()) - V = Builder.CreateBitCast( - V, llvm::IntegerType::get(Builder.getContext(), Size)); - V = Builder.CreateZExtOrBitCast(V, IntTy); - } llvm::Type *ExpTy = F->getFunctionType()->getFunctionParamType(I + InsertOld); Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy)); } - Value *V = Builder.CreateCall(F, Args); - if (Size < 32 && !DataTy->isIntegerTy()) - V = Builder.CreateTrunc( - V, llvm::IntegerType::get(Builder.getContext(), Size)); - return Builder.CreateTruncOrBitCast(V, DataTy); + return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_permlane16: case AMDGPU::BI__builtin_amdgcn_permlanex16: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl index a4054cba236dd..7e4ee6f4a942d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -36,45 +36,37 @@ void test_mov_dpp8_long(global long* out, long a) { } // CHECK-LABEL: @test_mov_dpp8_float( -// CHECK: %0 = bitcast float %a to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: store i32 %1, +// CHECK: %0 = tail call{{.*}} float @llvm.amdgcn.mov.dpp8.f32(float %a, i32 1) +// CHECK-NEXT: store float %0, void test_mov_dpp8_float(global float* out, float a) { *out = __builtin_amdgcn_mov_dpp8(a, 1); } // CHECK-LABEL: @test_mov_dpp8_double -// CHECK: %0 = bitcast double %x to i64 -// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.mov.dpp8.i64(i64 %0, i32 1) -// CHECK-NEXT: store i64 %1, +// CHECK: %0 = tail call{{.*}} double @llvm.amdgcn.mov.dpp8.f64(double %x, i32 1) +// CHECK-NEXT: store double %0, void test_mov_dpp8_double(double x, global double *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_short -// CHECK: %0 = zext i16 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i16 -// CHECK-NEXT: store i16 %2, +// CHECK: %0 = tail call{{.*}} i16 @llvm.amdgcn.mov.dpp8.i16(i16 %x, i32 1) +// CHECK-NEXT: store i16 %0, void test_mov_dpp8_short(short x, global short *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_char -// CHECK: %0 = zext i8 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %0, i32 1) -// CHECK-NEXT: %2 = trunc i32 %1 to i8 -// CHECK-NEXT: store i8 %2, +// CHECK: %0 = tail call{{.*}} i8 @llvm.amdgcn.mov.dpp8.i8(i8 %x, i32 1) +// CHECK-NEXT: store i8 %0, void test_mov_dpp8_char(char x, global char *p) { *p = __builtin_amdgcn_mov_dpp8(x, 1); } // CHECK-LABEL: @test_mov_dpp8_half -// CHECK: %0 = load i16, -// CHECK: %1 = zext i16 %0 to i32 -// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %1, i32 1) -// CHECK-NEXT: %3 = trunc i32 %2 to i16 -// CHECK-NEXT: store i16 %3, +// CHECK: %0 = load half, +// CHECK-NEXT: %1 = tail call{{.*}} half @llvm.amdgcn.mov.dpp8.f16(half %0, i32 1) +// CHECK-NEXT: store half %1, void test_mov_dpp8_half(half *x, global half *p) { *p = __builtin_amdgcn_mov_dpp8(*x, 1); } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 269f20e2f53fe..0c5995be5e098 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -117,45 +117,37 @@ void test_mov_dpp_long(long x, global long *p) { } // CHECK-LABEL: @test_mov_dpp_float -// CHECK: %0 = bitcast float %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: store i32 %1, +// CHECK: %0 = tail call{{.*}} float @llvm.amdgcn.update.dpp.f32(float poison, float %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store float %0, void test_mov_dpp_float(float x, global float *p) { *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_mov_dpp_double -// CHECK: %0 = bitcast double %x to i64 -// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: store i64 %1, +// CHECK: %0 = tail call{{.*}} double @llvm.amdgcn.update.dpp.f64(double poison, double %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store double %0, void test_mov_dpp_double(double x, global double *p) { *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_mov_dpp_short -// CHECK: %0 = zext i16 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: %2 = trunc i32 %1 to i16 -// CHECK-NEXT: store i16 %2, +// CHECK: %0 = tail call{{.*}} i16 @llvm.amdgcn.update.dpp.i16(i16 poison, i16 %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store i16 %0, void test_mov_dpp_short(short x, global short *p) { *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_mov_dpp_char -// CHECK: %0 = zext i8 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: %2 = trunc i32 %1 to i8 -// CHECK-NEXT: store i8 %2, +// CHECK: %0 = tail call{{.*}} i8 @llvm.amdgcn.update.dpp.i8(i8 poison, i8 %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store i8 %0, void test_mov_dpp_char(char x, global char *p) { *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_mov_dpp_half -// CHECK: %0 = load i16, -// CHECK: %1 = zext i16 %0 to i32 -// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: %3 = trunc i32 %2 to i16 -// CHECK-NEXT: store i16 %3, +// CHECK: %0 = load half, +// CHECK-NEXT: %1 = tail call{{.*}} half @llvm.amdgcn.update.dpp.f16(half poison, half %0, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store half %1, void test_mov_dpp_half(half *x, global half *p) { *p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0); } @@ -175,45 +167,37 @@ void test_update_dpp_long(long x, global long *p) { } // CHECK-LABEL: @test_update_dpp_float -// CHECK: %0 = bitcast float %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: store i32 %1, +// CHECK: %0 = tail call{{.*}} float @llvm.amdgcn.update.dpp.f32(float %x, float %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store float %0, void test_update_dpp_float(float x, global float *p) { *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_update_dpp_double -// CHECK: %0 = bitcast double %x to i64 -// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: store i64 %1, +// CHECK: %0 = tail call{{.*}} double @llvm.amdgcn.update.dpp.f64(double %x, double %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store double %0, void test_update_dpp_double(double x, global double *p) { *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_update_dpp_short -// CHECK: %0 = zext i16 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: %2 = trunc i32 %1 to i16 -// CHECK-NEXT: store i16 %2, +// CHECK: %0 = tail call{{.*}} i16 @llvm.amdgcn.update.dpp.i16(i16 %x, i16 %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store i16 %0, void test_update_dpp_short(short x, global short *p) { *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_update_dpp_char -// CHECK: %0 = zext i8 %x to i32 -// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: %2 = trunc i32 %1 to i8 -// CHECK-NEXT: store i8 %2, +// CHECK: %0 = tail call{{.*}} i8 @llvm.amdgcn.update.dpp.i8(i8 %x, i8 %x, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store i8 %0, void test_update_dpp_char(char x, global char *p) { *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0); } // CHECK-LABEL: @test_update_dpp_half -// CHECK: %0 = load i16, -// CHECK: %1 = zext i16 %0 to i32 -// CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false) -// CHECK-NEXT: %3 = trunc i32 %2 to i16 -// CHECK-NEXT: store i16 %3, +// CHECK: %0 = load half, +// CHECK-NEXT: %1 = tail call{{.*}} half @llvm.amdgcn.update.dpp.f16(half %0, half %0, i32 257, i32 15, i32 15, i1 false) +// CHECK-NEXT: store half %1, void test_update_dpp_half(half *x, global half *p) { *p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0); }