Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc")
TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst")
BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc")
BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc")
BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc")
BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc")
TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts")
TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts")
TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts")
TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts")
BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
Expand All @@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts")
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts")
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd just swap this two to make them align.

BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts")
TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts")
TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")

Expand Down
86 changes: 86 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu fiji -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK %s


#pragma OPENCL EXTENSION cl_khr_fp64 : enable

typedef unsigned long ulong;
typedef unsigned int uint;
typedef unsigned short ushort;
typedef half __attribute__((ext_vector_type(2))) half2;
typedef short __attribute__((ext_vector_type(2))) short2;
typedef ushort __attribute__((ext_vector_type(2))) ushort2;
typedef uint __attribute__((ext_vector_type(4))) uint4;

// CHECK-LABEL: @test_lerp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
void test_lerp(global int* out, int a, int b, int c)
{
*out = __builtin_amdgcn_lerp(a, b, c);
}

// CHECK-LABEL: @test_cubeid(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
void test_cubeid(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubeid(a, b, c);
}

// CHECK-LABEL: @test_cubesc(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
void test_cubesc(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubesc(a, b, c);
}

// CHECK-LABEL: @test_cubetc(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
void test_cubetc(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubetc(a, b, c);
}

// CHECK-LABEL: @test_cubema(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
void test_cubema(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubema(a, b, c);
}

// CHECK-LABEL: @test_cvt_pknorm_i16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
}

// CHECK-LABEL: @test_cvt_pknorm_u16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
}

// CHECK-LABEL: @test_sad_u8(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_u8(src0, src1, src2);
}

// CHECK-LABEL: test_msad_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_msad_u8(src0, src1, src2);
}

// CHECK-LABEL: test_sad_hi_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
}

// CHECK-LABEL: @test_sad_u16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_u16(src0, src1, src2);
}

// CHECK-LABEL: @test_qsad_pk_u16_u8(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
*out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
}
67 changes: 0 additions & 67 deletions clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -251,13 +251,6 @@ void test_fract_f64(global int* out, double a)
*out = __builtin_amdgcn_fract(a);
}

// CHECK-LABEL: @test_lerp
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
void test_lerp(global int* out, int a, int b, int c)
{
*out = __builtin_amdgcn_lerp(a, b, c);
}

// CHECK-LABEL: @test_sicmp_i32
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
void test_sicmp_i32(global ulong* out, int a, int b)
Expand Down Expand Up @@ -865,30 +858,6 @@ void test_s_setprio()
__builtin_amdgcn_s_setprio(3);
}

// CHECK-LABEL: @test_cubeid(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
void test_cubeid(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubeid(a, b, c);
}

// CHECK-LABEL: @test_cubesc(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
void test_cubesc(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubesc(a, b, c);
}

// CHECK-LABEL: @test_cubetc(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
void test_cubetc(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubetc(a, b, c);
}

// CHECK-LABEL: @test_cubema(
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
void test_cubema(global float* out, float a, float b, float c) {
*out = __builtin_amdgcn_cubema(a, b, c);
}

// CHECK-LABEL: @test_read_exec(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global ulong* out) {
Expand Down Expand Up @@ -1139,18 +1108,6 @@ kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
}

// CHECK-LABEL: @test_cvt_pknorm_i16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
}

// CHECK-LABEL: @test_cvt_pknorm_u16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
}

// CHECK-LABEL: @test_cvt_pk_i16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
Expand All @@ -1169,36 +1126,12 @@ kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src
*out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
}

// CHECK-LABEL: @test_sad_u8(
// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_u8(src0, src1, src2);
}

// CHECK-LABEL: test_msad_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_msad_u8(src0, src1, src2);
}

// CHECK-LABEL: test_sad_hi_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
}

// CHECK-LABEL: @test_sad_u16(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_sad_u16(src0, src1, src2);
}

// CHECK-LABEL: @test_qsad_pk_u16_u8(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
*out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
}

// CHECK-LABEL: @test_mqsad_pk_u16_u8(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
Expand Down
Loading
Loading