Skip to content

Commit a4a0c24

Browse files
committed
opencl: cleanup preprocessor for kernels
1 parent e4ab469 commit a4a0c24

31 files changed

+60
-315
lines changed

ggml/src/ggml-opencl/kernels/add.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
//------------------------------------------------------------------------------
24
// add
35
//------------------------------------------------------------------------------

ggml/src/ggml-opencl/kernels/clamp.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
//------------------------------------------------------------------------------
24
// clamp
35
//------------------------------------------------------------------------------

ggml/src/ggml-opencl/kernels/cpy.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
//------------------------------------------------------------------------------
24
// cpy
35
//------------------------------------------------------------------------------

ggml/src/ggml-opencl/kernels/cvt.cl

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,20 @@
11
//------------------------------------------------------------------------------
2-
// This file is contains additional kernels for data conversion.
2+
// This file is contains kernels for data conversion.
33
// These kernels are used when loading the model, so its performance is less
44
// important.
55
//------------------------------------------------------------------------------
6-
#ifdef cl_khr_fp16
76
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
8-
#elif defined(cl_amd_fp16)
9-
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
10-
#else
11-
#error "Half precision floating point not supportedby OpenCL implementation on your device."
12-
#endif
13-
14-
#ifdef cl_khr_subgroups
15-
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
16-
#elif defined(cl_intel_subgroups)
17-
#pragma OPENCL EXTENSION cl_intel_subgroups : enable
18-
#else
19-
#error "Subgroup not supported on your device."
20-
#endif
217

228
#ifdef cl_intel_required_subgroup_size
23-
// Always use subgroup size of 32 on Intel.
249
#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
2510
#define INTEL_GPU 1
2611
#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
2712
#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
2813
#elif defined(cl_qcom_reqd_sub_group_size)
29-
// Always use subgroups size of 64 on Adreno.
3014
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
3115
#define ADRENO_GPU 1
3216
#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
3317
#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
34-
#else
35-
// TODO: do not know how to choose subgroup size on other GPUs.
36-
#error "Selecting subgroup size is not supported on your device."
3718
#endif
3819

3920
#define QK4_0 32

ggml/src/ggml-opencl/kernels/diag_mask_inf.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
//------------------------------------------------------------------------------
24
// diag_mask_inf kernels
35
//------------------------------------------------------------------------------

ggml/src/ggml-opencl/kernels/gelu.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
//------------------------------------------------------------------------------
24
// gelu
35
//------------------------------------------------------------------------------

ggml/src/ggml-opencl/kernels/get_rows.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
typedef char int8_t;
24
typedef uchar uint8_t;
35
typedef short int16_t;

ggml/src/ggml-opencl/kernels/im2col_f16.cl

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,4 @@
1-
#ifdef cl_khr_fp16
21
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
3-
#elif defined(cl_amd_fp16)
4-
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
5-
#else
6-
#error "Half precision floating point not supportedby OpenCL implementation on your device."
7-
#endif
8-
9-
#ifdef cl_khr_subgroups
10-
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
11-
#elif defined(cl_intel_subgroups)
12-
#pragma OPENCL EXTENSION cl_intel_subgroups : enable
13-
#else
14-
#error "Subgroup not supported on your device."
15-
#endif
16-
17-
#ifdef cl_intel_required_subgroup_size
18-
// Always use subgroup size of 32 on Intel.
19-
#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
20-
#define INTEL_GPU 1
21-
#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
22-
#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
23-
#elif defined(cl_qcom_reqd_sub_group_size)
24-
// Always use subgroups size of 64 on Adreno.
25-
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
26-
#define ADRENO_GPU 1
27-
#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
28-
#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
29-
#else
30-
#error "Selecting subgroup size is not supported on your device."
31-
#endif
322

333
kernel void kernel_im2col_f16(
344
global float * src1,
@@ -54,7 +24,6 @@ kernel void kernel_im2col_f16(
5424
int d1
5525
) {
5626
long i = get_global_id(0);
57-
5827
if (i >= pelements) {
5928
return;
6029
}

ggml/src/ggml-opencl/kernels/im2col_f32.cl

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,4 @@
1-
#ifdef cl_khr_fp16
21
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
3-
#elif defined(cl_amd_fp16)
4-
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
5-
#else
6-
#error "Half precision floating point not supportedby OpenCL implementation on your device."
7-
#endif
8-
9-
#ifdef cl_khr_subgroups
10-
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
11-
#elif defined(cl_intel_subgroups)
12-
#pragma OPENCL EXTENSION cl_intel_subgroups : enable
13-
#else
14-
#error "Subgroup not supported on your device."
15-
#endif
16-
17-
#ifdef cl_intel_required_subgroup_size
18-
// Always use subgroup size of 32 on Intel.
19-
#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
20-
#define INTEL_GPU 1
21-
#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
22-
#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
23-
#elif defined(cl_qcom_reqd_sub_group_size)
24-
// Always use subgroups size of 64 on Adreno.
25-
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
26-
#define ADRENO_GPU 1
27-
#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
28-
#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
29-
#else
30-
#error "Selecting subgroup size is not supported on your device."
31-
#endif
322

333
kernel void kernel_im2col_f32(
344
global float * src1,

ggml/src/ggml-opencl/kernels/mul.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2+
13
//------------------------------------------------------------------------------
24
// mul
35
//------------------------------------------------------------------------------

0 commit comments

Comments
 (0)