diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 212e52ef6a1c8..c4824d145a54d 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -8599,7 +8599,6 @@ static void ggml_compute_forward_timestep_embedding_f32( } if (dim % 2 != 0 && ith == 0) { embed_data[2 * half] = 0.f; - embed_data[dim] = 0.f; } } } diff --git a/ggml/src/ggml-cuda/tsembd.cu b/ggml/src/ggml-cuda/tsembd.cu index 153ddbcda92dc..b91a26fc80e61 100644 --- a/ggml/src/ggml-cuda/tsembd.cu +++ b/ggml/src/ggml-cuda/tsembd.cu @@ -7,11 +7,11 @@ static __global__ void timestep_embedding_f32(const float * timesteps, float * d int j = threadIdx.x + blockIdx.x * blockDim.x; float * embed_data = (float *)((char *)dst + i*nb1); - if (dim % 2 != 0 && j == ((dim + 1) / 2)) { - embed_data[dim] = 0.f; + int half = dim / 2; + if (dim % 2 != 0 && j == half) { + embed_data[2 * half] = 0.f; } - int half = dim / 2; if (j >= half) { return; } diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 4314c9cc9388a..5057e264f6090 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4167,7 +4167,7 @@ kernel void kernel_timestep_embedding_f32( } if (args.dim % 2 != 0 && tpitg.x == 0) { - embed_data[args.dim] = 0.f; + embed_data[2 * half_] = 0.f; } } diff --git a/ggml/src/ggml-opencl/kernels/tsembd.cl b/ggml/src/ggml-opencl/kernels/tsembd.cl index 4b1107f70ba7a..21444bd958298 100644 --- a/ggml/src/ggml-opencl/kernels/tsembd.cl +++ b/ggml/src/ggml-opencl/kernels/tsembd.cl @@ -26,8 +26,8 @@ kernel void kernel_timestep_embedding( local_half_dim = logical_dim / 2; local_embed_data_ptr = (global float *)((global char *)local_dst_output_base_ptr + local_i * dst_nb1_bytes); - if (logical_dim % 2 != 0 && local_j == ((logical_dim + 1) / 2)) { - local_embed_data_ptr[logical_dim] = 0.0f; + if (logical_dim % 2 != 0 && local_j == local_half_dim) { + local_embed_data_ptr[2 * local_half_dim] = 0.0f; } if (local_j >= local_half_dim) { diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp index f6ca626ea7a53..f2003794d3f55 100644 --- a/ggml/src/ggml-sycl/tsembd.cpp +++ b/ggml/src/ggml-sycl/tsembd.cpp @@ -21,11 +21,12 @@ static void timestep_embedding_f32( int j = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2); float * embed_data = (float *)((char *)dst + i*nb1); - if (dim % 2 != 0 && j == ((dim + 1) / 2)) { - embed_data[dim] = 0.f; + int half = dim / 2; + + if (dim % 2 != 0 && j == half) { + embed_data[2 * half] = 0.f; } - int half = dim / 2; if (j >= half) { return; } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp b/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp index 79e065a9313aa..ce8e09442d9b6 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp @@ -24,11 +24,12 @@ void main() { const uint j = gl_GlobalInvocationID.x; const uint d_offset = i * p.nb1; - if (p.dim % 2 != 0 && j == ((p.dim + 1) / 2)) { - data_d[d_offset + p.dim] = 0.f; + const uint half_dim = p.dim / 2; + + if (p.dim % 2 != 0 && j == half_dim) { + data_d[d_offset + 2 * half_dim] = 0.f; } - const uint half_dim = p.dim / 2; if (j >= half_dim) { return; } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 50dc1aa24fff5..3584827dca7fc 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -4923,12 +4923,8 @@ struct ggml_tensor * ggml_timestep_embedding( struct ggml_tensor * timesteps, int dim, int max_period) { - int actual_dim = dim; - if (dim % 2 != 0) { - actual_dim = dim + 1; - } - struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]); + struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]); ggml_set_op_params_i32(result, 0, dim); ggml_set_op_params_i32(result, 1, max_period);