Skip to content

Commit 1008c1f

Browse files
committed
small fixes
1 parent 4b5d450 commit 1008c1f

File tree

7 files changed

+33
-33
lines changed

7 files changed

+33
-33
lines changed

ggml/src/ggml-opencl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ set(GGML_OPENCL_KERNELS
9898
concat
9999
tsembd
100100
upscale
101-
unary
101+
tanh
102102
pad
103103
repeat
104104
)

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ struct ggml_backend_opencl_context {
317317
cl_program program_sum_rows_f32;
318318
cl_program program_repeat;
319319
cl_program program_pad;
320-
cl_program program_unary;
320+
cl_program program_tanh;
321321
cl_program program_upscale;
322322
cl_program program_concat;
323323
cl_program program_tsembd;
@@ -1154,24 +1154,24 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
11541154
}
11551155
}
11561156

1157-
// unary (tanh)
1157+
// tanh
11581158
{
11591159
#ifdef GGML_OPENCL_EMBED_KERNELS
11601160
const std::string kernel_src {
1161-
#include "unary.cl.h"
1161+
#include "tanh.cl.h"
11621162
};
11631163
#else
1164-
const std::string kernel_src = read_file("unary.cl");
1164+
const std::string kernel_src = read_file("tanh.cl");
11651165
#endif
11661166
if (!kernel_src.empty()) {
1167-
backend_ctx->program_unary =
1167+
backend_ctx->program_tanh =
11681168
build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts);
1169-
CL_CHECK((backend_ctx->kernel_tanh_f32_nd = clCreateKernel(backend_ctx->program_unary, "kernel_tanh_f32_nd", &err), err));
1170-
CL_CHECK((backend_ctx->kernel_tanh_f16_nd = clCreateKernel(backend_ctx->program_unary, "kernel_tanh_f16_nd", &err), err));
1169+
CL_CHECK((backend_ctx->kernel_tanh_f32_nd = clCreateKernel(backend_ctx->program_tanh, "kernel_tanh_f32_nd", &err), err));
1170+
CL_CHECK((backend_ctx->kernel_tanh_f16_nd = clCreateKernel(backend_ctx->program_tanh, "kernel_tanh_f16_nd", &err), err));
11711171
GGML_LOG_CONT(".");
11721172
} else {
1173-
GGML_LOG_WARN("ggml_opencl: unary kernel source not found or empty. Unary operations like tanh will not be available.\n");
1174-
backend_ctx->program_unary = nullptr;
1173+
GGML_LOG_WARN("ggml_opencl: tanh kernel source not found or empty. Tanh operation will not be available.\n");
1174+
backend_ctx->program_tanh = nullptr;
11751175
backend_ctx->kernel_tanh_f32_nd = nullptr;
11761176
backend_ctx->kernel_tanh_f16_nd = nullptr;
11771177
}
@@ -2150,7 +2150,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
21502150
case GGML_OP_NORM:
21512151
case GGML_OP_RMS_NORM:
21522152
return true;
2153-
case GGML_OP_REPEAT:
2153+
case GGML_OP_REPEAT:
21542154
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32; // Assuming F32 for now, can be expanded
21552155
case GGML_OP_PAD:
21562156
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32 &&

ggml/src/ggml-opencl/kernels/concat.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,4 @@ kernel void kernel_concat_f32_non_contiguous(
106106
y_val_ptr = (global float *)(dst_base + (ulong)current_i3*d_nb3 + (ulong)current_i2*d_nb2 + (ulong)current_i1*d_nb1 + (ulong)current_i0*d_nb0);
107107
*y_val_ptr = *x_val_ptr;
108108
}
109-
}
109+
}

ggml/src/ggml-opencl/kernels/repeat.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ kernel void kernel_repeat(
3636
current_dst_el_ptr[k] = current_src_el_ptr[k];
3737
}
3838
}
39-
}
39+
}

ggml/src/ggml-opencl/kernels/unary.cl renamed to ggml/src/ggml-opencl/kernels/tanh.cl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ kernel void kernel_tanh_f32_nd(
3131

3232
ulong dst_offset_in_tensor = (ulong)i0*nb10 + (ulong)i1*nb11 + (ulong)i2*nb12 + (ulong)i3*nb13;
3333
global float *dst_val_ptr = (global float *)((global char *)p_dst_base + off_dst_abs + dst_offset_in_tensor);
34-
34+
3535
*dst_val_ptr = tanh(*src_val_ptr);
3636
}
3737
}
@@ -56,8 +56,8 @@ kernel void kernel_tanh_f16_nd(
5656

5757
ulong dst_offset_in_tensor = (ulong)i0*nb10 + (ulong)i1*nb11 + (ulong)i2*nb12 + (ulong)i3*nb13;
5858
global half *dst_val_ptr = (global half *)((global char *)p_dst_base + off_dst_abs + dst_offset_in_tensor);
59-
59+
6060
*dst_val_ptr = tanh(*src_val_ptr);
6161
}
6262
}
63-
}
63+
}

ggml/src/ggml-opencl/kernels/tsembd.cl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ kernel void kernel_timestep_embedding(
2020
local_timesteps_input_ptr = (global const float *)((global char *)p_timesteps + off_timesteps);
2121
local_dst_output_base_ptr = (global float *)((global char *)p_dst + off_dst);
2222

23-
local_i = get_global_id(1);
24-
local_j = get_global_id(0);
23+
local_i = get_global_id(1);
24+
local_j = get_global_id(0);
2525

2626
local_half_dim = logical_dim / 2;
2727
local_embed_data_ptr = (global float *)((global char *)local_dst_output_base_ptr + local_i * dst_nb1_bytes);
@@ -34,15 +34,15 @@ kernel void kernel_timestep_embedding(
3434
return;
3535
}
3636

37-
local_timestep_val = local_timesteps_input_ptr[local_i];
38-
39-
if (local_half_dim == 0) {
40-
local_freq = 1.0f;
37+
local_timestep_val = local_timesteps_input_ptr[local_i];
38+
39+
if (local_half_dim == 0) {
40+
local_freq = 1.0f;
4141
} else {
4242
local_freq = exp(-log((float)max_period) * (float)local_j / (float)local_half_dim);
4343
}
44-
44+
4545
local_arg = local_timestep_val * local_freq;
4646
local_embed_data_ptr[local_j] = cos(local_arg);
4747
local_embed_data_ptr[local_j + local_half_dim] = sin(local_arg);
48-
}
48+
}

ggml/src/ggml-opencl/kernels/upscale.cl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,16 @@ kernel void kernel_upscale_bilinear(
4747
ulong off_src0,
4848
global void * p_dst,
4949
ulong off_dst,
50-
ulong nb00,
51-
ulong nb01,
52-
ulong nb02,
53-
ulong nb03,
54-
int ne00_src,
55-
int ne01_src,
56-
int ne10_dst,
50+
ulong nb00,
51+
ulong nb01,
52+
ulong nb02,
53+
ulong nb03,
54+
int ne00_src,
55+
int ne01_src,
56+
int ne10_dst,
5757
int ne11_dst,
5858
int ne12_dst,
59-
int ne13_dst,
59+
int ne13_dst,
6060
float sf0,
6161
float sf1,
6262
float sf2,
@@ -118,4 +118,4 @@ kernel void kernel_upscale_bilinear(
118118
val_d * dx * dy;
119119

120120
dst_base[index] = result;
121-
}
121+
}

0 commit comments

Comments
 (0)