CodeLinaro
diff --git a/‎ggml/src/ggml-opencl/kernels/add.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/add.cl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/clamp.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/clamp.cl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/cpy.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/cpy.cl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/cvt.cl‎
Lines changed: 1 addition & 20 deletions b/‎ggml/src/ggml-opencl/kernels/cvt.cl‎
Lines changed: 1 addition & 20 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/diag_mask_inf.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/diag_mask_inf.cl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/gelu.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/gelu.cl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/get_rows.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/get_rows.cl‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/im2col_f16.cl‎
Lines changed: 0 additions & 31 deletions b/‎ggml/src/ggml-opencl/kernels/im2col_f16.cl‎
Lines changed: 0 additions & 31 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/im2col_f32.cl‎
Lines changed: 0 additions & 30 deletions b/‎ggml/src/ggml-opencl/kernels/im2col_f32.cl‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎ggml/src/ggml-opencl/kernels/mul.cl‎
Lines changed: 2 additions & 0 deletions b/‎ggml/src/ggml-opencl/kernels/mul.cl‎
Lines changed: 2 additions & 0 deletions
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 //------------------------------------------------------------------------------
 // add
 //------------------------------------------------------------------------------
 
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 //------------------------------------------------------------------------------
 // clamp
 //------------------------------------------------------------------------------
 
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 //------------------------------------------------------------------------------
 // cpy
 //------------------------------------------------------------------------------
 
@@ -1,39 +1,20 @@
 //------------------------------------------------------------------------------
-// This file is contains additional kernels for data conversion.
+// This file is contains kernels for data conversion.
 // These kernels are used when loading the model, so its performance is less
 // important.
 //------------------------------------------------------------------------------
-#ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#elif defined(cl_amd_fp16)
-#pragma OPENCL EXTENSION cl_amd_fp16 : enable
-#else
-#error "Half precision floating point not supportedby OpenCL implementation on your device."
-#endif
-
-#ifdef cl_khr_subgroups
-#pragma OPENCL EXTENSION cl_khr_subgroups : enable
-#elif defined(cl_intel_subgroups)
-#pragma OPENCL EXTENSION cl_intel_subgroups : enable
-#else
-#error "Subgroup not supported on your device."
-#endif
 
 #ifdef cl_intel_required_subgroup_size
-// Always use subgroup size of 32 on Intel.
 #pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
 #define INTEL_GPU 1
 #define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
 #define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
 #elif defined(cl_qcom_reqd_sub_group_size)
-// Always use subgroups size of 64 on Adreno.
 #pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
 #define ADRENO_GPU 1
 #define REQD_SUBGROUP_SIZE_64  __attribute__((qcom_reqd_sub_group_size("half")))
 #define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
-#else
-// TODO: do not know how to choose subgroup size on other GPUs.
-#error "Selecting subgroup size is not supported on your device."
 #endif
 
 #define QK4_0                   32
 
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 //------------------------------------------------------------------------------
 // diag_mask_inf kernels
 //------------------------------------------------------------------------------
 
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 //------------------------------------------------------------------------------
 // gelu
 //------------------------------------------------------------------------------
 
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 typedef char int8_t;
 typedef uchar uint8_t;
 typedef short int16_t;
 
@@ -1,34 +1,4 @@
-#ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#elif defined(cl_amd_fp16)
-#pragma OPENCL EXTENSION cl_amd_fp16 : enable
-#else
-#error "Half precision floating point not supportedby OpenCL implementation on your device."
-#endif
-
-#ifdef cl_khr_subgroups
-#pragma OPENCL EXTENSION cl_khr_subgroups : enable
-#elif defined(cl_intel_subgroups)
-#pragma OPENCL EXTENSION cl_intel_subgroups : enable
-#else
-#error "Subgroup not supported on your device."
-#endif
-
-#ifdef cl_intel_required_subgroup_size
-// Always use subgroup size of 32 on Intel.
-#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
-#define INTEL_GPU 1
-#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
-#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
-#elif defined(cl_qcom_reqd_sub_group_size)
-// Always use subgroups size of 64 on Adreno.
-#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
-#define ADRENO_GPU 1
-#define REQD_SUBGROUP_SIZE_64  __attribute__((qcom_reqd_sub_group_size("half")))
-#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
-#else
-#error "Selecting subgroup size is not supported on your device."
-#endif
 
 kernel void kernel_im2col_f16(
         global float * src1,
@@ -54,7 +24,6 @@ kernel void kernel_im2col_f16(
         int  d1
 ) {
     long i = get_global_id(0);
-
     if (i >= pelements) {
         return;
     }
 
@@ -1,34 +1,4 @@
-#ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#elif defined(cl_amd_fp16)
-#pragma OPENCL EXTENSION cl_amd_fp16 : enable
-#else
-#error "Half precision floating point not supportedby OpenCL implementation on your device."
-#endif
-
-#ifdef cl_khr_subgroups
-#pragma OPENCL EXTENSION cl_khr_subgroups : enable
-#elif defined(cl_intel_subgroups)
-#pragma OPENCL EXTENSION cl_intel_subgroups : enable
-#else
-#error "Subgroup not supported on your device."
-#endif
-
-#ifdef cl_intel_required_subgroup_size
-// Always use subgroup size of 32 on Intel.
-#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
-#define INTEL_GPU 1
-#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
-#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
-#elif defined(cl_qcom_reqd_sub_group_size)
-// Always use subgroups size of 64 on Adreno.
-#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
-#define ADRENO_GPU 1
-#define REQD_SUBGROUP_SIZE_64  __attribute__((qcom_reqd_sub_group_size("half")))
-#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
-#else
-#error "Selecting subgroup size is not supported on your device."
-#endif
 
 kernel void kernel_im2col_f32(
         global float * src1,
 
@@ -1,3 +1,5 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 //------------------------------------------------------------------------------
 // mul
 //------------------------------------------------------------------------------
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+#pragma OPENCL EXTENSION cl_khr_fp16 : enable`
	`2`	`+`
`1`	`3`	`//------------------------------------------------------------------------------`
`2`	`4`	`// add`
`3`	`5`	`//------------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+#pragma OPENCL EXTENSION cl_khr_fp16 : enable`
	`2`	`+`
`1`	`3`	`typedef char int8_t;`
`2`	`4`	`typedef uchar uint8_t;`
`3`	`5`	`typedef short int16_t;`