From 1dc94065039c44e82c31da027293cf7e44dc4210 Mon Sep 17 00:00:00 2001 From: Ilia Ilmer Date: Sun, 12 Oct 2025 15:36:52 -0400 Subject: [PATCH 1/9] initial: headers and metal-device.cpp updates --- ggml/src/ggml-metal/ggml-metal-device.cpp | 25 +++++++++++++++++++++++ ggml/src/ggml-metal/ggml-metal-device.h | 1 + ggml/src/ggml-metal/ggml-metal-device.m | 1 + ggml/src/ggml-metal/ggml-metal-ops.h | 1 + 4 files changed, 28 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal-device.cpp b/ggml/src/ggml-metal/ggml-metal-device.cpp index e23abdda97405..3634c050fa479 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.cpp +++ b/ggml/src/ggml-metal/ggml-metal-device.cpp @@ -1387,6 +1387,31 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_1d(ggml_met return res; } +ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_2d(ggml_metal_library_t lib, const ggml_tensor * op) { + assert(op->op == GGML_OP_CONV_TRANSPOSE_2D); + + GGML_ASSERT(ggml_is_contiguous(op->src[0])); + GGML_ASSERT(ggml_is_contiguous(op->src[1])); + GGML_ASSERT(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32); + GGML_ASSERT(op->type == GGML_TYPE_F32); + + char base[256]; + char name[256]; + + snprintf(base, 256, "kernel_conv_transpose_2d_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type)); + snprintf(name, 256, "%s", base); + + ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name); + if (res) { + return res; + } + + res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr); + + return res; +} + ggml_metal_pipeline_t ggml_metal_library_get_pipeline_upscale(ggml_metal_library_t lib, const ggml_tensor * op) { assert(op->op == GGML_OP_UPSCALE); diff --git a/ggml/src/ggml-metal/ggml-metal-device.h b/ggml/src/ggml-metal/ggml-metal-device.h index 1034e4bbf6596..76af8be10b016 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.h +++ b/ggml/src/ggml-metal/ggml-metal-device.h @@ -129,6 +129,7 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_norm (ggml_me ggml_metal_pipeline_t ggml_metal_library_get_pipeline_rope (ggml_metal_library_t lib, const struct ggml_tensor * op); ggml_metal_pipeline_t ggml_metal_library_get_pipeline_im2col (ggml_metal_library_t lib, const struct ggml_tensor * op); ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_1d (ggml_metal_library_t lib, const struct ggml_tensor * op); +ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_2d (ggml_metal_library_t lib, const struct ggml_tensor * op); ggml_metal_pipeline_t ggml_metal_library_get_pipeline_upscale (ggml_metal_library_t lib, const struct ggml_tensor * op); ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pad (ggml_metal_library_t lib, const struct ggml_tensor * op); ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pad_reflect_1d (ggml_metal_library_t lib, const struct ggml_tensor * op); diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 9527973015245..cacdbebf6535e 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -647,6 +647,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te case GGML_OP_REPEAT: case GGML_OP_SCALE: case GGML_OP_CONV_TRANSPOSE_1D: + case GGML_OP_CONV_TRANSPOSE_2D: return true; case GGML_OP_CLAMP: return op->src[0]->type == GGML_TYPE_F32; diff --git a/ggml/src/ggml-metal/ggml-metal-ops.h b/ggml/src/ggml-metal/ggml-metal-ops.h index d4cb9446212d9..4ef826b7375a7 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.h +++ b/ggml/src/ggml-metal/ggml-metal-ops.h @@ -70,6 +70,7 @@ int ggml_metal_op_norm (ggml_metal_op_t ctx, int idx); int ggml_metal_op_rope (ggml_metal_op_t ctx, int idx); int ggml_metal_op_im2col (ggml_metal_op_t ctx, int idx); int ggml_metal_op_conv_transpose_1d (ggml_metal_op_t ctx, int idx); +int ggml_metal_op_conv_transpose_2d (ggml_metal_op_t ctx, int idx); int ggml_metal_op_upscale (ggml_metal_op_t ctx, int idx); int ggml_metal_op_pad (ggml_metal_op_t ctx, int idx); int ggml_metal_op_pad_reflect_1d (ggml_metal_op_t ctx, int idx); From 09661b92fd2ec2f9b155f16fe8f9373b13002e7b Mon Sep 17 00:00:00 2001 From: Ilia Ilmer Date: Sun, 12 Oct 2025 15:38:08 -0400 Subject: [PATCH 2/9] adding conv_transpose_2d --- ggml/src/ggml-metal/ggml-metal-impl.h | 13 +++++ ggml/src/ggml-metal/ggml-metal-ops.cpp | 56 +++++++++++++++++++ ggml/src/ggml-metal/ggml-metal.metal | 75 ++++++++++++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h index c9dff87305869..2f1f5fdbfcef5 100644 --- a/ggml/src/ggml-metal/ggml-metal-impl.h +++ b/ggml/src/ggml-metal/ggml-metal-impl.h @@ -513,6 +513,19 @@ typedef struct { uint64_t nb1; } ggml_metal_kargs_conv_transpose_1d; +typedef struct { + int32_t IC; + int32_t IH; + int32_t IW; + int32_t KH; + int32_t KW; + int32_t OC; + int32_t s0; + uint64_t nb0; + uint64_t nb1; + uint64_t nb2; +} ggml_metal_kargs_conv_transpose_2d; + typedef struct { uint64_t ofs0; uint64_t ofs1; diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp index 5f9370449bb2d..38106e35ebe26 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.cpp +++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp @@ -364,6 +364,10 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) { { n_fuse = ggml_metal_op_conv_transpose_1d(ctx, idx); } break; + case GGML_OP_CONV_TRANSPOSE_2D: + { + n_fuse = ggml_metal_op_conv_transpose_2d(ctx, idx); + } break; case GGML_OP_UPSCALE: { n_fuse = ggml_metal_op_upscale(ctx, idx); @@ -3068,6 +3072,58 @@ int ggml_metal_op_conv_transpose_1d(ggml_metal_op_t ctx, int idx) { return 1; } +int ggml_metal_op_conv_transpose_2d(ggml_metal_op_t ctx, int idx) { + ggml_tensor * op = ctx->node(idx); + + ggml_metal_library_t lib = ctx->lib; + ggml_metal_encoder_t enc = ctx->enc; + + GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne); + GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb); + GGML_TENSOR_LOCALS( int32_t, ne1, op->src[1], ne); + GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb); + GGML_TENSOR_LOCALS( int32_t, ne, op, ne); + GGML_TENSOR_LOCALS(uint32_t, nb, op, nb); + + const int32_t s0 = ((const int32_t *)(op->op_params))[0]; + + const int32_t IC = op->src[1]->ne[2]; + const int32_t IH = op->src[1]->ne[1]; + const int32_t IW = op->src[1]->ne[0]; + + const int32_t KH = op->src[0]->ne[1]; + const int32_t KW = op->src[0]->ne[0]; + + const int32_t OW = op->ne[0]; + const int32_t OH = op->ne[1]; + const int32_t OC = op->ne[2]; + + ggml_metal_kargs_conv_transpose_2d args = { + /*.IC =*/ IC, + /*.IH =*/ IH, + /*.IW =*/ IW, + /*.KH =*/ KH, + /*.KW =*/ KW, + /*.OC =*/ OC, + /*.s0 =*/ s0, + /*.nb0 =*/ nb0, + /*.nb1 =*/ nb1, + /*.nb2 =*/ nb2, + }; + + ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_conv_transpose_2d(lib, op); + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[0]), 1); + ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[1]), 2); + ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op), 3); + + ggml_metal_encoder_dispatch_threadgroups(enc, OW, OH, OC, 1, 1, 1); + + return 1; +} + int ggml_metal_op_upscale(ggml_metal_op_t ctx, int idx) { ggml_tensor * op = ctx->node(idx); diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index ddc285042d284..6e6566f961ed2 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4131,6 +4131,81 @@ kernel void kernel_conv_transpose_1d( uint3 tgpig[[threadgroup_position_in_grid]], uint3 tgpg[[threadgroups_per_grid]]); + +typedef void (conv_transpose_2d_t)( + constant ggml_metal_kargs_conv_transpose_2d & args, + device const float * src0, + device const float * src1, + device char * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tgpg[[threadgroups_per_grid]]); + +template +kernel void kernel_conv_transpose_2d( + constant ggml_metal_kargs_conv_transpose_2d & args, + device const T * src0, + device const float * src1, + device char * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tgpg[[threadgroups_per_grid]]) { + + const int32_t out_x = tgpig[0]; + const int32_t out_y = tgpig[1]; + const int32_t out_c = tgpig[2]; + + float v = 0.0f; + + for (int32_t in_c = 0; in_c= args.IH) continue; + + for (int32_t kw = 0; kw= args.IW) continue; + + const int32_t input_idx = (args.IW * args.IH) * in_c + (args.IW) * in_y + in_x; + const int32_t kernel_idx = (args.KH * args.KW * args.OC) * in_c + (args.KH * args.KW) * out_c + (args.KW) * kh + kw; + + v += (float)src0[kernel_idx] * src1[input_idx]; + + } + } + } + device float * dst_ptr = (device float *) (dst + out_x*args.nb0 + out_y * args.nb1 + out_c*args.nb2); + + dst_ptr[0] = v; +} + +template [[host_name("kernel_conv_transpose_2d_f32_f32")]] +kernel void kernel_conv_transpose_2d( + constant ggml_metal_kargs_conv_transpose_2d & args, + device const float * src0, + device const float * src1, + device char * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tgpg[[threadgroups_per_grid]]); + +template [[host_name("kernel_conv_transpose_2d_f16_f32")]] +kernel void kernel_conv_transpose_2d( + constant ggml_metal_kargs_conv_transpose_2d & args, + device const half * src0, + device const float * src1, + device char * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tgpg[[threadgroups_per_grid]]); + kernel void kernel_upscale_f32( constant ggml_metal_kargs_upscale & args, device const char * src0, From 2f77e82be7e1fb51901d09b5dde85f646dc5e20a Mon Sep 17 00:00:00 2001 From: Ilia Ilmer Date: Sun, 12 Oct 2025 16:16:04 -0400 Subject: [PATCH 3/9] fix type --- ggml/src/ggml-metal/ggml-metal.metal | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 6e6566f961ed2..dacb32ef5b193 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4155,10 +4155,10 @@ kernel void kernel_conv_transpose_2d( float v = 0.0f; - for (int32_t in_c = 0; in_c= args.IH) continue; - for (int32_t kw = 0; kw= args.IW) continue; - const int32_t input_idx = (args.IW * args.IH) * in_c + (args.IW) * in_y + in_x; - const int32_t kernel_idx = (args.KH * args.KW * args.OC) * in_c + (args.KH * args.KW) * out_c + (args.KW) * kh + kw; + const int64_t input_idx = (args.IW * args.IH) * in_c + (args.IW) * in_y + in_x; + const int64_t kernel_idx = (args.KH * args.KW * args.OC) * in_c + (args.KH * args.KW) * out_c + (args.KW) * kh + kw; v += (float)src0[kernel_idx] * src1[input_idx]; From a190a9dd468014babe836768ad1b4b7bd3c1967d Mon Sep 17 00:00:00 2001 From: Ilia Ilmer Date: Sun, 12 Oct 2025 16:22:04 -0400 Subject: [PATCH 4/9] fix type: int32->int64 --- ggml/src/ggml-metal/ggml-metal.metal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index dacb32ef5b193..9b9321abcf7de 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4149,9 +4149,9 @@ kernel void kernel_conv_transpose_2d( uint3 tgpig[[threadgroup_position_in_grid]], uint3 tgpg[[threadgroups_per_grid]]) { - const int32_t out_x = tgpig[0]; - const int32_t out_y = tgpig[1]; - const int32_t out_c = tgpig[2]; + const int64_t out_x = tgpig[0]; + const int64_t out_y = tgpig[1]; + const int64_t out_c = tgpig[2]; float v = 0.0f; From aa4b222e599a7e1451f6ee5d519aeeea67cd36ff Mon Sep 17 00:00:00 2001 From: Ilia Ilmer Date: Mon, 13 Oct 2025 19:57:48 -0400 Subject: [PATCH 5/9] Update ggml/src/ggml-metal/ggml-metal.metal Co-authored-by: Georgi Gerganov --- ggml/src/ggml-metal/ggml-metal.metal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 9b9321abcf7de..8c4393c5cd00a 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4155,8 +4155,8 @@ kernel void kernel_conv_transpose_2d( float v = 0.0f; - for (int64_t in_c = 0; in_c Date: Mon, 13 Oct 2025 19:57:57 -0400 Subject: [PATCH 6/9] Update ggml/src/ggml-metal/ggml-metal.metal Co-authored-by: Georgi Gerganov --- ggml/src/ggml-metal/ggml-metal.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 8c4393c5cd00a..65d842d7ee7f0 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4169,7 +4169,7 @@ kernel void kernel_conv_transpose_2d( for (int64_t kw = 0; kw Date: Mon, 13 Oct 2025 19:58:06 -0400 Subject: [PATCH 7/9] Update ggml/src/ggml-metal/ggml-metal.metal Co-authored-by: Georgi Gerganov --- ggml/src/ggml-metal/ggml-metal.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 65d842d7ee7f0..14b3cdb0f9a21 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4166,7 +4166,7 @@ kernel void kernel_conv_transpose_2d( if (in_y >= args.IH) continue; - for (int64_t kw = 0; kw Date: Mon, 13 Oct 2025 20:18:37 -0400 Subject: [PATCH 8/9] add checks for src[0] and src[1]; add type checks --- ggml/src/ggml-metal/ggml-metal-device.m | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index cacdbebf6535e..9b9fdcfa29076 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -647,8 +647,12 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te case GGML_OP_REPEAT: case GGML_OP_SCALE: case GGML_OP_CONV_TRANSPOSE_1D: - case GGML_OP_CONV_TRANSPOSE_2D: return true; + case GGML_OP_CONV_TRANSPOSE_2D: + return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]) && + (op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32) && + op->src[1]->type == GGML_TYPE_F32 && + op->type == GGML_TYPE_F32; case GGML_OP_CLAMP: return op->src[0]->type == GGML_TYPE_F32; case GGML_OP_SQR: From 2f1ed3ccc41e73c3a026aa190f425bdd468a641c Mon Sep 17 00:00:00 2001 From: Ilia Ilmer Date: Tue, 14 Oct 2025 06:35:54 -0400 Subject: [PATCH 9/9] Update ggml-metal.metal Co-authored-by: Georgi Gerganov --- ggml/src/ggml-metal/ggml-metal.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 14b3cdb0f9a21..101ae4e4d38f3 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4166,7 +4166,7 @@ kernel void kernel_conv_transpose_2d( if (in_y >= args.IH) continue; - for (int64_t kw = 0; kw