Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 3 additions & 55 deletions libclc/clc/lib/generic/geometric/clc_dot.cl
Original file line number Diff line number Diff line change
Expand Up @@ -7,59 +7,7 @@
//===----------------------------------------------------------------------===//

#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>

_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; }

_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) {
return p0.x * p1.x + p0.y * p1.y;
}

_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
}

_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
}

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) {
return p0 * p1;
}

_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) {
return p0.x * p1.x + p0.y * p1.y;
}

_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
}

_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
}

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; }

_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) {
return p0.x * p1.x + p0.y * p1.y;
}

_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
}

_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) {
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
}

#endif
#define __CLC_BODY <clc_dot.inc>
#include <clc/math/gentype.inc>
25 changes: 25 additions & 0 deletions libclc/clc/lib/generic/geometric/clc_dot.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)

_CLC_OVERLOAD _CLC_DEF __CLC_SCALAR_GENTYPE __clc_dot(__CLC_GENTYPE x,
__CLC_GENTYPE y) {
#if __CLC_VECSIZE_OR_1 == 1
return x * y;
#elif __CLC_VECSIZE_OR_1 == 2
return x.s0 * y.s0 + x.s1 * y.s1;
#elif __CLC_VECSIZE_OR_1 == 3
return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2;
#else
return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2 + x.s3 * y.s3;
#endif
}

#endif
60 changes: 3 additions & 57 deletions libclc/opencl/lib/generic/geometric/dot.cl
Original file line number Diff line number Diff line change
Expand Up @@ -9,60 +9,6 @@
#include <clc/geometric/clc_dot.h>
#include <clc/opencl/clc.h>

_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
return __clc_dot(p0, p1);
}

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
return __clc_dot(p0, p1);
}

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); }

_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
return __clc_dot(p0, p1);
}

_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
return __clc_dot(p0, p1);
}

#endif
#define FUNCTION dot
#define __CLC_BODY <clc/geometric/binary_def.inc>
#include <clc/math/gentype.inc>
Loading