Skip to content

Commit 967037f

Browse files
committed
metal : migrate ggml-metal.m to ggml-metal.cpp
ggml-ci
1 parent 9aec524 commit 967037f

File tree

5 files changed

+192
-125
lines changed

5 files changed

+192
-125
lines changed

ggml/src/ggml-metal/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
55
message(STATUS "Metal framework found")
66

77
ggml_add_backend_library(ggml-metal
8-
ggml-metal.m
8+
ggml-metal.cpp
99
ggml-metal-device.m
1010
ggml-metal-device.cpp
1111
ggml-metal-common.cpp

ggml/src/ggml-metal/ggml-metal-context.m

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -466,11 +466,11 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t ctx_dev) {
466466

467467
res->ctx_dev = ctx_dev;
468468

469-
struct ggml_metal_device_props props_dev = ggml_metal_device_get_props(ctx_dev);
469+
const struct ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx_dev);
470470

471471
res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
472472

473-
res->use_bfloat = props_dev.has_bfloat;
473+
res->use_bfloat = props_dev->has_bfloat;
474474
res->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil;
475475
res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil;
476476

@@ -540,9 +540,9 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t ctx_dev) {
540540
GGML_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_"#name); \
541541
}
542542

543-
const bool has_simdgroup_mm = props_dev.has_simdgroup_mm;
544-
const bool has_simdgroup_reduction = props_dev.has_simdgroup_reduction;
545-
const bool has_bfloat = props_dev.has_bfloat;
543+
const bool has_simdgroup_mm = props_dev->has_simdgroup_mm;
544+
const bool has_simdgroup_reduction = props_dev->has_simdgroup_reduction;
545+
const bool has_bfloat = props_dev->has_bfloat;
546546

547547
// simd_sum and simd_max requires MTLGPUFamilyApple7
548548

@@ -1258,7 +1258,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
12581258
}
12591259
}
12601260

1261-
struct ggml_metal_device_props props_dev = ggml_metal_device_get_props(ctx->ctx_dev);
1261+
const struct ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx->ctx_dev);
12621262

12631263
switch (dst->op) {
12641264
case GGML_OP_CONCAT:
@@ -2637,7 +2637,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
26372637
} else
26382638
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
26392639
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
2640-
if (props_dev.supports_gpu_family_apple7 &&
2640+
if (props_dev->supports_gpu_family_apple7 &&
26412641
!ggml_is_transposed(src0) &&
26422642
!ggml_is_transposed(src1) &&
26432643
src1t == GGML_TYPE_F32 &&
@@ -2975,7 +2975,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
29752975

29762976
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
29772977
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
2978-
if (props_dev.supports_gpu_family_apple7 &&
2978+
if (props_dev->supports_gpu_family_apple7 &&
29792979
ne00 % 32 == 0 && ne00 >= 64 &&
29802980
(ne21 >= ne21_mm_id_min)) {
29812981
GGML_ASSERT(ne00 % 4 == 0);
@@ -3024,7 +3024,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
30243024

30253025
const size_t smem = ne02*ne20*sizeof(uint16_t);
30263026

3027-
GGML_ASSERT(smem <= props_dev.max_theadgroup_memory_size);
3027+
GGML_ASSERT(smem <= props_dev->max_theadgroup_memory_size);
30283028

30293029
[encoder setComputePipelineState:pipeline];
30303030
[encoder setBytes:&args length:sizeof(args) atIndex:0];
@@ -4209,7 +4209,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
42094209
// nsgmax = 2;
42104210
// while (true) {
42114211
// const size_t smem = FATTN_SMEM(nsgmax);
4212-
// if (smem > props_dev.max_theadgroup_memory_size) {
4212+
// if (smem > props_dev->max_theadgroup_memory_size) {
42134213
// break;
42144214
// }
42154215
// nsgmax *= 2;
@@ -4277,8 +4277,8 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
42774277

42784278
[encoder setBuffer:id_dst offset:offs_dst atIndex:6];
42794279

4280-
//printf("smem: %zu, max: %zu, nsg = %d, ne02 = %d, ne12 = %d\n", smem, props_dev.max_theadgroup_memory_size, (int) nsg, ne02, ne12);
4281-
GGML_ASSERT(smem <= props_dev.max_theadgroup_memory_size);
4280+
//printf("smem: %zu, max: %zu, nsg = %d, ne02 = %d, ne12 = %d\n", smem, props_dev->max_theadgroup_memory_size, (int) nsg, ne02, ne12);
4281+
GGML_ASSERT(smem <= props_dev->max_theadgroup_memory_size);
42824282
[encoder setThreadgroupMemoryLength:smem atIndex:0];
42834283
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + nqptg - 1)/nqptg, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(32, nsg, 1)];
42844284
#undef FATTN_SMEM
@@ -4305,7 +4305,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
43054305
while (true) {
43064306
const size_t smem = FATTN_SMEM(nsgmax);
43074307
// avoid using more than half of the threadgroup memory - can cause slow downs especially for large head sizes
4308-
if (smem > props_dev.max_theadgroup_memory_size/2) {
4308+
if (smem > props_dev->max_theadgroup_memory_size/2) {
43094309
break;
43104310
}
43114311
nsgmax *= 2;
@@ -4394,8 +4394,8 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
43944394

43954395
const size_t smem = FATTN_SMEM(nsg);
43964396

4397-
//printf("smem: %zu, max: %zu, nsg = %d, nsgmax = %d\n", smem, props_dev.max_theadgroup_memory_size, (int) nsg, (int) nsgmax);
4398-
GGML_ASSERT(smem <= props_dev.max_theadgroup_memory_size);
4397+
//printf("smem: %zu, max: %zu, nsg = %d, nsgmax = %d\n", smem, props_dev->max_theadgroup_memory_size, (int) nsg, (int) nsgmax);
4398+
GGML_ASSERT(smem <= props_dev->max_theadgroup_memory_size);
43994399

44004400
if (nwg == 1) {
44014401
// using 1 workgroup -> write the result directly into dst

ggml/src/ggml-metal/ggml-metal-device.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void * ggml_metal_device_get_queue (ggml_metal_device_t ctx); // id<MTLCommandQ
4242
void ggml_metal_device_get_memory(ggml_metal_device_t ctx, size_t * free, size_t * total);
4343
bool ggml_metal_device_supports_op(ggml_metal_device_t ctx, const struct ggml_tensor * op);
4444

45-
struct ggml_metal_device_props ggml_metal_device_get_props(ggml_metal_device_t ctx);
45+
const struct ggml_metal_device_props * ggml_metal_device_get_props(ggml_metal_device_t ctx);
4646

4747
//
4848
// device buffers

ggml/src/ggml-metal/ggml-metal-device.m

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -538,8 +538,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t ctx, const struct ggml_te
538538
}
539539
}
540540

541-
struct ggml_metal_device_props ggml_metal_device_get_props(ggml_metal_device_t ctx) {
542-
return ctx->props;
541+
const struct ggml_metal_device_props * ggml_metal_device_get_props(ggml_metal_device_t ctx) {
542+
return &ctx->props;
543543
}
544544

545545
//
@@ -686,9 +686,9 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t ctx, size_t size,
686686
size_aligned += (size_page - (size_aligned % size_page));
687687
}
688688

689-
const struct ggml_metal_device_props props_dev = ggml_metal_device_get_props(ctx);
689+
const struct ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx);
690690

691-
shared = shared && props_dev.use_shared_buffers;
691+
shared = shared && props_dev->use_shared_buffers;
692692

693693
// allocate shared buffer if the device supports it and it is required by the buffer type
694694
if (shared) {
@@ -711,7 +711,7 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t ctx, size_t size,
711711
res->buffers[0].metal = nil;
712712

713713
if (size_aligned > 0) {
714-
if (props_dev.use_shared_buffers &&shared) {
714+
if (props_dev->use_shared_buffers &&shared) {
715715
res->buffers[0].metal = [res->device newBufferWithBytesNoCopy:res->all_data
716716
length:size_aligned
717717
options:MTLResourceStorageModeShared
@@ -732,7 +732,7 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t ctx, size_t size,
732732
return NULL;
733733
}
734734

735-
res->use_residency_sets = props_dev.use_residency_sets;
735+
res->use_residency_sets = props_dev->use_residency_sets;
736736

737737
if (!ggml_metal_buffer_rset_init(res)) {
738738
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
@@ -772,10 +772,10 @@ ggml_metal_buffer_t ggml_metal_buffer_map(ggml_metal_device_t ctx, void * ptr, s
772772
res->device = ggml_metal_device_get_device(ctx);
773773
res->queue = ggml_metal_device_get_queue (ctx);
774774

775-
const struct ggml_metal_device_props props_dev = ggml_metal_device_get_props(ctx);
775+
const struct ggml_metal_device_props * props_dev = ggml_metal_device_get_props(ctx);
776776

777777
// the buffer fits into the max buffer size allowed by the device
778-
if (size_aligned <= props_dev.max_buffer_size) {
778+
if (size_aligned <= props_dev->max_buffer_size) {
779779
res->buffers[res->n_buffers].data = ptr;
780780
res->buffers[res->n_buffers].size = size;
781781
res->buffers[res->n_buffers].metal = nil;
@@ -797,8 +797,8 @@ ggml_metal_buffer_t ggml_metal_buffer_map(ggml_metal_device_t ctx, void * ptr, s
797797
// this overlap between the views will guarantee that the tensor with the maximum size will fully fit into
798798
// one of the views
799799
const size_t size_ovlp = ((max_tensor_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case
800-
const size_t size_step = props_dev.max_buffer_size - size_ovlp;
801-
const size_t size_view = props_dev.max_buffer_size;
800+
const size_t size_step = props_dev->max_buffer_size - size_ovlp;
801+
const size_t size_view = props_dev->max_buffer_size;
802802

803803
for (size_t i = 0; i < size; i += size_step) {
804804
const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
@@ -827,7 +827,7 @@ ggml_metal_buffer_t ggml_metal_buffer_map(ggml_metal_device_t ctx, void * ptr, s
827827
}
828828
}
829829

830-
res->use_residency_sets = props_dev.use_residency_sets;
830+
res->use_residency_sets = props_dev->use_residency_sets;
831831

832832
if (!ggml_metal_buffer_rset_init(res)) {
833833
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);

0 commit comments

Comments
 (0)