From 4cc767987a658f4793d87687a98106a75730d02f Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 21 Feb 2025 02:36:29 +0300
Subject: [PATCH 1/6] ggml_compute_forward_concat() for arbitrary tensor type

---
 src/ggml-cpu/ggml-cpu.c | 143 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 142 insertions(+), 1 deletion(-)

diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c
index fcbb5c233f..901f362400 100644
--- a/src/ggml-cpu/ggml-cpu.c
+++ b/src/ggml-cpu/ggml-cpu.c
@@ -6094,6 +6094,135 @@ static void ggml_compute_forward_repeat_back(
 
 // ggml_compute_forward_concat
 
+static void ggml_compute_forward_concat_any(
+    const struct ggml_compute_params * params,
+    struct ggml_tensor * dst) {
+
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+
+    const size_t len = src0->nb[0];
+
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    GGML_TENSOR_BINARY_OP_LOCALS
+
+    const int32_t dim = ggml_get_op_params_i32(dst, 0);
+
+    GGML_ASSERT(dim >= 0 && dim < 4);
+
+    int64_t o[4] = {0, 0, 0, 0};
+    o[dim] = src0->ne[dim];
+
+    const char * x;
+
+    // TODO: smarter multi-theading
+    for (int i3 = 0; i3 < ne3; i3++) {
+        for (int i2 = ith; i2 < ne2; i2 += nth) {
+            for (int i1 = 0; i1 < ne1; i1++) {
+                for (int i0 = 0; i0 < ne0; i0++) {
+                    if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
+                        x = (const char *)src0->data + (i0       )*nb00 + (i1       )*nb01 + (i2       )*nb02 + (i3       )*nb03;
+                    } else {
+                        x = (const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13;
+                    }
+
+                    char * y = (char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3;
+
+                    memcpy(y, x, len);
+                }
+            }
+        }
+    }
+}
+
+static void ggml_compute_forward_concat_i8(
+    const struct ggml_compute_params * params,
+    struct ggml_tensor * dst) {
+
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+
+    GGML_ASSERT(src0->nb[0] == sizeof(int8_t));
+
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    GGML_TENSOR_BINARY_OP_LOCALS
+
+    const int32_t dim = ggml_get_op_params_i32(dst, 0);
+
+    GGML_ASSERT(dim >= 0 && dim < 4);
+
+    int64_t o[4] = {0, 0, 0, 0};
+    o[dim] = src0->ne[dim];
+
+    const int8_t * x;
+
+    // TODO: smarter multi-theading
+    for (int i3 = 0; i3 < ne3; i3++) {
+        for (int i2 = ith; i2 < ne2; i2 += nth) {
+            for (int i1 = 0; i1 < ne1; i1++) {
+                for (int i0 = 0; i0 < ne0; i0++) {
+                    if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
+                        x = (const int8_t *) ((const char *)src0->data + (i0       )*nb00 + (i1       )*nb01 + (i2       )*nb02 + (i3       )*nb03);
+                    } else {
+                        x = (const int8_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
+                    }
+
+                    int8_t * y = (int8_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
+
+                    *y = *x;
+                }
+            }
+        }
+    }
+}
+
+static void ggml_compute_forward_concat_f16(
+    const struct ggml_compute_params * params,
+    struct ggml_tensor * dst) {
+
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+
+    GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t));
+
+    const int ith = params->ith;
+    const int nth = params->nth;
+
+    GGML_TENSOR_BINARY_OP_LOCALS
+
+    const int32_t dim = ggml_get_op_params_i32(dst, 0);
+
+    GGML_ASSERT(dim >= 0 && dim < 4);
+
+    int64_t o[4] = {0, 0, 0, 0};
+    o[dim] = src0->ne[dim];
+
+    const ggml_fp16_t * x;
+
+    // TODO: smarter multi-theading
+    for (int i3 = 0; i3 < ne3; i3++) {
+        for (int i2 = ith; i2 < ne2; i2 += nth) {
+            for (int i1 = 0; i1 < ne1; i1++) {
+                for (int i0 = 0; i0 < ne0; i0++) {
+                    if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
+                        x = (const ggml_fp16_t *) ((const char *)src0->data + (i0       )*nb00 + (i1       )*nb01 + (i2       )*nb02 + (i3       )*nb03);
+                    } else {
+                        x = (const ggml_fp16_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
+                    }
+
+                    ggml_fp16_t * y = (ggml_fp16_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
+
+                    *y = *x;
+                }
+            }
+        }
+    }
+}
+
 static void ggml_compute_forward_concat_f32(
     const struct ggml_compute_params * params,
     struct ggml_tensor * dst) {
@@ -6144,6 +6273,16 @@ static void ggml_compute_forward_concat(
     const struct ggml_tensor * src0 = dst->src[0];
 
     switch (src0->type) {
+        case GGML_TYPE_F16:
+        case GGML_TYPE_BF16:
+        case GGML_TYPE_I16:
+            {
+                ggml_compute_forward_concat_f16(params, dst);
+            } break;
+        case GGML_TYPE_I8:
+            {
+                ggml_compute_forward_concat_i8(params, dst);
+            } break;
         case GGML_TYPE_F32:
         case GGML_TYPE_I32:
             {
@@ -6151,7 +6290,9 @@ static void ggml_compute_forward_concat(
             } break;
         default:
             {
-                GGML_ABORT("fatal error");
+                //GGML_ABORT("fatal error");
+                //printf("bytes: %lu\n", src0->nb[0]);
+                ggml_compute_forward_concat_any(params, dst);
             }
     }
 }

From d02df79183e3cf37f2829c6d2b0dec2dd56fed25 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 21 Feb 2025 13:01:38 +0300
Subject: [PATCH 2/6] Check that tensors' type match

---
 src/ggml-cpu/ggml-cpu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c
index 901f362400..d6b029c228 100644
--- a/src/ggml-cpu/ggml-cpu.c
+++ b/src/ggml-cpu/ggml-cpu.c
@@ -6101,7 +6101,8 @@ static void ggml_compute_forward_concat_any(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
 
-    const size_t len = src0->nb[0];
+    GGML_ASSERT(src0->type == dst->type);
+    const size_t len = ggml_type_size(src0->type);
 
     const int ith = params->ith;
     const int nth = params->nth;
@@ -6290,8 +6291,6 @@ static void ggml_compute_forward_concat(
             } break;
         default:
             {
-                //GGML_ABORT("fatal error");
-                //printf("bytes: %lu\n", src0->nb[0]);
                 ggml_compute_forward_concat_any(params, dst);
             }
     }

From 1478f333b4dc4f2874fd7b69d69c1eb0ce991c77 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 21 Feb 2025 16:01:33 +0300
Subject: [PATCH 3/6] ggml-cpu.c: check type of source tensors

---
 src/ggml-cpu/ggml-cpu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c
index d6b029c228..bdc0b8eb38 100644
--- a/src/ggml-cpu/ggml-cpu.c
+++ b/src/ggml-cpu/ggml-cpu.c
@@ -6101,7 +6101,7 @@ static void ggml_compute_forward_concat_any(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
 
-    GGML_ASSERT(src0->type == dst->type);
+    GGML_ASSERT(src0->type == src1->type);
     const size_t len = ggml_type_size(src0->type);
 
     const int ith = params->ith;
@@ -6145,7 +6145,7 @@ static void ggml_compute_forward_concat_i8(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
 
-    GGML_ASSERT(src0->nb[0] == sizeof(int8_t));
+    GGML_ASSERT(ggml_type_size(src0->type) == sizeof(int8_t));
 
     const int ith = params->ith;
     const int nth = params->nth;
@@ -6188,7 +6188,7 @@ static void ggml_compute_forward_concat_f16(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
 
-    GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t));
+    GGML_ASSERT(ggml_type_size(src0->type) == sizeof(ggml_fp16_t));
 
     const int ith = params->ith;
     const int nth = params->nth;
@@ -6231,7 +6231,7 @@ static void ggml_compute_forward_concat_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
 
-    GGML_ASSERT(src0->nb[0] == sizeof(float));
+    GGML_ASSERT(ggml_type_size(src0->type) == sizeof(float));
 
     const int ith = params->ith;
     const int nth = params->nth;

From 5f4fde388d5479ed2f88c091ddc72c73962e18cb Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 21 Feb 2025 16:54:46 +0300
Subject: [PATCH 4/6] ggml-cpu.c: move tensor type check to
 ggml_compute_forward_concat()

---
 src/ggml-cpu/ggml-cpu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c
index bdc0b8eb38..b2ce001ac7 100644
--- a/src/ggml-cpu/ggml-cpu.c
+++ b/src/ggml-cpu/ggml-cpu.c
@@ -6101,7 +6101,6 @@ static void ggml_compute_forward_concat_any(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
 
-    GGML_ASSERT(src0->type == src1->type);
     const size_t len = ggml_type_size(src0->type);
 
     const int ith = params->ith;
@@ -6273,6 +6272,8 @@ static void ggml_compute_forward_concat(
 
     const struct ggml_tensor * src0 = dst->src[0];
 
+    GGML_ASSERT(src0->type == dst->src[1]->type);
+
     switch (src0->type) {
         case GGML_TYPE_F16:
         case GGML_TYPE_BF16:

From 9a3041d34c00b646f35e82124c3fc362e311e029 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 21 Feb 2025 17:14:49 +0300
Subject: [PATCH 5/6] ggml.c: check concatenated tensor type

---
 src/ggml.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ggml.c b/src/ggml.c
index e9f3420c29..1f1561efaf 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -2328,6 +2328,7 @@ struct ggml_tensor * ggml_concat(
     struct ggml_tensor  * b,
     int                   dim) {
     GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
+    GGML_ASSERT(a->type == b->type);
 
     int64_t ne[GGML_MAX_DIMS];
     for (int d = 0; d < GGML_MAX_DIMS; ++d) {

From e0832b7850fc73c21c94c89b1d271e0eb12f0c9c Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 21 Feb 2025 17:32:42 +0300
Subject: [PATCH 6/6] Remove tensor type check from
 ggml_compute_forward_concat() in ggml-cpu.c

..., as it was moved to ggml.c.
---
 src/ggml-cpu/ggml-cpu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c
index b2ce001ac7..725fd6cde3 100644
--- a/src/ggml-cpu/ggml-cpu.c
+++ b/src/ggml-cpu/ggml-cpu.c
@@ -6272,8 +6272,6 @@ static void ggml_compute_forward_concat(
 
     const struct ggml_tensor * src0 = dst->src[0];
 
-    GGML_ASSERT(src0->type == dst->src[1]->type);
-
     switch (src0->type) {
         case GGML_TYPE_F16:
         case GGML_TYPE_BF16: