From 4cc767987a658f4793d87687a98106a75730d02f Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 21 Feb 2025 02:36:29 +0300 Subject: [PATCH 1/6] ggml_compute_forward_concat() for arbitrary tensor type --- src/ggml-cpu/ggml-cpu.c | 143 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index fcbb5c233f..901f362400 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -6094,6 +6094,135 @@ static void ggml_compute_forward_repeat_back( // ggml_compute_forward_concat +static void ggml_compute_forward_concat_any( + const struct ggml_compute_params * params, + struct ggml_tensor * dst) { + + const struct ggml_tensor * src0 = dst->src[0]; + const struct ggml_tensor * src1 = dst->src[1]; + + const size_t len = src0->nb[0]; + + const int ith = params->ith; + const int nth = params->nth; + + GGML_TENSOR_BINARY_OP_LOCALS + + const int32_t dim = ggml_get_op_params_i32(dst, 0); + + GGML_ASSERT(dim >= 0 && dim < 4); + + int64_t o[4] = {0, 0, 0, 0}; + o[dim] = src0->ne[dim]; + + const char * x; + + // TODO: smarter multi-theading + for (int i3 = 0; i3 < ne3; i3++) { + for (int i2 = ith; i2 < ne2; i2 += nth) { + for (int i1 = 0; i1 < ne1; i1++) { + for (int i0 = 0; i0 < ne0; i0++) { + if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) { + x = (const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03; + } else { + x = (const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13; + } + + char * y = (char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3; + + memcpy(y, x, len); + } + } + } + } +} + +static void ggml_compute_forward_concat_i8( + const struct ggml_compute_params * params, + struct ggml_tensor * dst) { + + const struct ggml_tensor * src0 = dst->src[0]; + const struct ggml_tensor * src1 = dst->src[1]; + + GGML_ASSERT(src0->nb[0] == sizeof(int8_t)); + + const int ith = params->ith; + const int nth = params->nth; + + GGML_TENSOR_BINARY_OP_LOCALS + + const int32_t dim = ggml_get_op_params_i32(dst, 0); + + GGML_ASSERT(dim >= 0 && dim < 4); + + int64_t o[4] = {0, 0, 0, 0}; + o[dim] = src0->ne[dim]; + + const int8_t * x; + + // TODO: smarter multi-theading + for (int i3 = 0; i3 < ne3; i3++) { + for (int i2 = ith; i2 < ne2; i2 += nth) { + for (int i1 = 0; i1 < ne1; i1++) { + for (int i0 = 0; i0 < ne0; i0++) { + if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) { + x = (const int8_t *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03); + } else { + x = (const int8_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13); + } + + int8_t * y = (int8_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3); + + *y = *x; + } + } + } + } +} + +static void ggml_compute_forward_concat_f16( + const struct ggml_compute_params * params, + struct ggml_tensor * dst) { + + const struct ggml_tensor * src0 = dst->src[0]; + const struct ggml_tensor * src1 = dst->src[1]; + + GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t)); + + const int ith = params->ith; + const int nth = params->nth; + + GGML_TENSOR_BINARY_OP_LOCALS + + const int32_t dim = ggml_get_op_params_i32(dst, 0); + + GGML_ASSERT(dim >= 0 && dim < 4); + + int64_t o[4] = {0, 0, 0, 0}; + o[dim] = src0->ne[dim]; + + const ggml_fp16_t * x; + + // TODO: smarter multi-theading + for (int i3 = 0; i3 < ne3; i3++) { + for (int i2 = ith; i2 < ne2; i2 += nth) { + for (int i1 = 0; i1 < ne1; i1++) { + for (int i0 = 0; i0 < ne0; i0++) { + if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) { + x = (const ggml_fp16_t *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03); + } else { + x = (const ggml_fp16_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13); + } + + ggml_fp16_t * y = (ggml_fp16_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3); + + *y = *x; + } + } + } + } +} + static void ggml_compute_forward_concat_f32( const struct ggml_compute_params * params, struct ggml_tensor * dst) { @@ -6144,6 +6273,16 @@ static void ggml_compute_forward_concat( const struct ggml_tensor * src0 = dst->src[0]; switch (src0->type) { + case GGML_TYPE_F16: + case GGML_TYPE_BF16: + case GGML_TYPE_I16: + { + ggml_compute_forward_concat_f16(params, dst); + } break; + case GGML_TYPE_I8: + { + ggml_compute_forward_concat_i8(params, dst); + } break; case GGML_TYPE_F32: case GGML_TYPE_I32: { @@ -6151,7 +6290,9 @@ static void ggml_compute_forward_concat( } break; default: { - GGML_ABORT("fatal error"); + //GGML_ABORT("fatal error"); + //printf("bytes: %lu\n", src0->nb[0]); + ggml_compute_forward_concat_any(params, dst); } } } From d02df79183e3cf37f2829c6d2b0dec2dd56fed25 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 21 Feb 2025 13:01:38 +0300 Subject: [PATCH 2/6] Check that tensors' type match --- src/ggml-cpu/ggml-cpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index 901f362400..d6b029c228 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -6101,7 +6101,8 @@ static void ggml_compute_forward_concat_any( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - const size_t len = src0->nb[0]; + GGML_ASSERT(src0->type == dst->type); + const size_t len = ggml_type_size(src0->type); const int ith = params->ith; const int nth = params->nth; @@ -6290,8 +6291,6 @@ static void ggml_compute_forward_concat( } break; default: { - //GGML_ABORT("fatal error"); - //printf("bytes: %lu\n", src0->nb[0]); ggml_compute_forward_concat_any(params, dst); } } From 1478f333b4dc4f2874fd7b69d69c1eb0ce991c77 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 21 Feb 2025 16:01:33 +0300 Subject: [PATCH 3/6] ggml-cpu.c: check type of source tensors --- src/ggml-cpu/ggml-cpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index d6b029c228..bdc0b8eb38 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -6101,7 +6101,7 @@ static void ggml_compute_forward_concat_any( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - GGML_ASSERT(src0->type == dst->type); + GGML_ASSERT(src0->type == src1->type); const size_t len = ggml_type_size(src0->type); const int ith = params->ith; @@ -6145,7 +6145,7 @@ static void ggml_compute_forward_concat_i8( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - GGML_ASSERT(src0->nb[0] == sizeof(int8_t)); + GGML_ASSERT(ggml_type_size(src0->type) == sizeof(int8_t)); const int ith = params->ith; const int nth = params->nth; @@ -6188,7 +6188,7 @@ static void ggml_compute_forward_concat_f16( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t)); + GGML_ASSERT(ggml_type_size(src0->type) == sizeof(ggml_fp16_t)); const int ith = params->ith; const int nth = params->nth; @@ -6231,7 +6231,7 @@ static void ggml_compute_forward_concat_f32( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - GGML_ASSERT(src0->nb[0] == sizeof(float)); + GGML_ASSERT(ggml_type_size(src0->type) == sizeof(float)); const int ith = params->ith; const int nth = params->nth; From 5f4fde388d5479ed2f88c091ddc72c73962e18cb Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 21 Feb 2025 16:54:46 +0300 Subject: [PATCH 4/6] ggml-cpu.c: move tensor type check to ggml_compute_forward_concat() --- src/ggml-cpu/ggml-cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index bdc0b8eb38..b2ce001ac7 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -6101,7 +6101,6 @@ static void ggml_compute_forward_concat_any( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - GGML_ASSERT(src0->type == src1->type); const size_t len = ggml_type_size(src0->type); const int ith = params->ith; @@ -6273,6 +6272,8 @@ static void ggml_compute_forward_concat( const struct ggml_tensor * src0 = dst->src[0]; + GGML_ASSERT(src0->type == dst->src[1]->type); + switch (src0->type) { case GGML_TYPE_F16: case GGML_TYPE_BF16: From 9a3041d34c00b646f35e82124c3fc362e311e029 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 21 Feb 2025 17:14:49 +0300 Subject: [PATCH 5/6] ggml.c: check concatenated tensor type --- src/ggml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ggml.c b/src/ggml.c index e9f3420c29..1f1561efaf 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -2328,6 +2328,7 @@ struct ggml_tensor * ggml_concat( struct ggml_tensor * b, int dim) { GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS); + GGML_ASSERT(a->type == b->type); int64_t ne[GGML_MAX_DIMS]; for (int d = 0; d < GGML_MAX_DIMS; ++d) { From e0832b7850fc73c21c94c89b1d271e0eb12f0c9c Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 21 Feb 2025 17:32:42 +0300 Subject: [PATCH 6/6] Remove tensor type check from ggml_compute_forward_concat() in ggml-cpu.c ..., as it was moved to ggml.c. --- src/ggml-cpu/ggml-cpu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index b2ce001ac7..725fd6cde3 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -6272,8 +6272,6 @@ static void ggml_compute_forward_concat( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(src0->type == dst->src[1]->type); - switch (src0->type) { case GGML_TYPE_F16: case GGML_TYPE_BF16: