Skip to content

Commit 508d065

Browse files
committed
cont : props
ggml-ci
1 parent c2e5dd5 commit 508d065

File tree

1 file changed

+50
-55
lines changed

1 file changed

+50
-55
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 50 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,8 @@ - (void) dealloc {
371371
id<MTLLibrary> library;
372372
id<MTLCommandQueue> queue; // currently a pointer to the device queue, but might become separate queue [TAG_QUEUE_PER_BACKEND]
373373

374+
struct ggml_backend_metal_device_props props_dev;
375+
374376
dispatch_queue_t d_queue;
375377

376378
// the set of pre-compiled kernels for this context
@@ -481,6 +483,8 @@ @implementation GGMLMetalClass
481483
return NULL;
482484
}
483485

486+
ctx->props_dev = ggml_backend_metal_device_get_props(dev->context);
487+
484488
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
485489

486490
ctx->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil;
@@ -496,8 +500,6 @@ @implementation GGMLMetalClass
496500
ctx->debug_fusion = val ? atoi(val) : 0;
497501
}
498502

499-
struct ggml_backend_metal_device_props props = ggml_backend_metal_device_get_props(dev->context);
500-
501503
ctx->use_graph_optimize = true;
502504

503505
if (getenv("GGML_METAL_GRAPH_OPTIMIZE_DISABLE") != NULL) {
@@ -553,9 +555,9 @@ @implementation GGMLMetalClass
553555
GGML_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_"#name); \
554556
}
555557

556-
const bool has_simdgroup_mm = props.has_simdgroup_mm;
557-
const bool has_simdgroup_reduction = props.has_simdgroup_reduction;
558-
const bool use_bfloat = props.use_bfloat;
558+
const bool has_simdgroup_mm = ctx->props_dev.has_simdgroup_mm;
559+
const bool has_simdgroup_reduction = ctx->props_dev.has_simdgroup_reduction;
560+
const bool use_bfloat = ctx->props_dev.use_bfloat;
559561

560562
// simd_sum and simd_max requires MTLGPUFamilyApple7
561563

@@ -1213,6 +1215,12 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
12131215
}
12141216
}
12151217

1218+
for (int i = 0; i < (int) ctx->cmd_bufs_ext.count; ++i) {
1219+
if (ctx->cmd_bufs_ext[i]) {
1220+
[ctx->cmd_bufs_ext[i] release];
1221+
}
1222+
}
1223+
12161224
[ctx->cmd_bufs_ext removeAllObjects];
12171225
[ctx->cmd_bufs_ext release];
12181226

@@ -1337,12 +1345,10 @@ static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer
13371345
return nil;
13381346
}
13391347

1340-
static bool ggml_metal_supports_op(ggml_backend_metal_device_t ctx_dev, const struct ggml_tensor * op) {
1341-
const struct ggml_backend_metal_device_props props = ggml_backend_metal_device_get_props(ctx_dev);
1342-
1343-
const bool has_simdgroup_mm = props.has_simdgroup_mm;
1344-
const bool has_simdgroup_reduction = props.has_simdgroup_reduction;
1345-
const bool use_bfloat = props.use_bfloat;
1348+
static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_props * props_dev, const struct ggml_tensor * op) {
1349+
const bool has_simdgroup_mm = props_dev->has_simdgroup_mm;
1350+
const bool has_simdgroup_reduction = props_dev->has_simdgroup_reduction;
1351+
const bool use_bfloat = props_dev->use_bfloat;
13461352

13471353
if (!use_bfloat) {
13481354
if (op->type == GGML_TYPE_BF16) {
@@ -1607,8 +1613,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
16071613

16081614
id<MTLComputeCommandEncoder> encoder = ctx_enc->encoder;
16091615

1610-
struct ggml_backend_metal_context * ctx = backend->context;
1611-
struct ggml_backend_metal_device * ctx_dev = backend->device->context;
1616+
struct ggml_backend_metal_context * ctx = backend->context;
16121617

16131618
struct ggml_cgraph * gf = ctx->gf;
16141619

@@ -1642,7 +1647,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
16421647
} break;
16431648
}
16441649

1645-
if (!ggml_metal_supports_op(ctx_dev, dst)) {
1650+
if (!ggml_metal_supports_op(&ctx->props_dev, dst)) {
16461651
GGML_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
16471652
GGML_ABORT("unsupported op");
16481653
}
@@ -1753,8 +1758,6 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
17531758
}
17541759
}
17551760

1756-
const struct ggml_backend_metal_device_props props = ggml_backend_metal_device_get_props(ctx_dev);
1757-
17581761
switch (dst->op) {
17591762
case GGML_OP_CONCAT:
17601763
{
@@ -3132,7 +3135,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
31323135
} else
31333136
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
31343137
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
3135-
if (props.supports_gpu_family_apple7 &&
3138+
if (ctx->props_dev.supports_gpu_family_apple7 &&
31363139
!ggml_is_transposed(src0) &&
31373140
!ggml_is_transposed(src1) &&
31383141
src1t == GGML_TYPE_F32 &&
@@ -3470,7 +3473,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
34703473

34713474
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
34723475
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
3473-
if (props.supports_gpu_family_apple7 &&
3476+
if (ctx->props_dev.supports_gpu_family_apple7 &&
34743477
ne00 % 32 == 0 && ne00 >= 64 &&
34753478
(ne21 >= ne21_mm_id_min)) {
34763479
GGML_ASSERT(ne00 % 4 == 0);
@@ -3519,7 +3522,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
35193522

35203523
const size_t smem = ne02*ne20*sizeof(uint16_t);
35213524

3522-
GGML_ASSERT(smem <= props.max_theadgroup_memory_size);
3525+
GGML_ASSERT(smem <= ctx->props_dev.max_theadgroup_memory_size);
35233526

35243527
[encoder setComputePipelineState:pipeline];
35253528
[encoder setBytes:&args length:sizeof(args) atIndex:0];
@@ -4704,7 +4707,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
47044707
// nsgmax = 2;
47054708
// while (true) {
47064709
// const size_t smem = FATTN_SMEM(nsgmax);
4707-
// if (smem > props.max_theadgroup_memory_size) {
4710+
// if (smem > ctx->props_dev.max_theadgroup_memory_size) {
47084711
// break;
47094712
// }
47104713
// nsgmax *= 2;
@@ -4772,8 +4775,8 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
47724775

47734776
[encoder setBuffer:id_dst offset:offs_dst atIndex:6];
47744777

4775-
//printf("smem: %zu, max: %zu, nsg = %d, ne02 = %d, ne12 = %d\n", smem, props.max_theadgroup_memory_size, (int) nsg, ne02, ne12);
4776-
GGML_ASSERT(smem <= props.max_theadgroup_memory_size);
4778+
//printf("smem: %zu, max: %zu, nsg = %d, ne02 = %d, ne12 = %d\n", smem, ctx->props_dev.max_theadgroup_memory_size, (int) nsg, ne02, ne12);
4779+
GGML_ASSERT(smem <= ctx->props_dev.max_theadgroup_memory_size);
47774780
[encoder setThreadgroupMemoryLength:smem atIndex:0];
47784781
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + nqptg - 1)/nqptg, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(32, nsg, 1)];
47794782
#undef FATTN_SMEM
@@ -4800,7 +4803,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
48004803
while (true) {
48014804
const size_t smem = FATTN_SMEM(nsgmax);
48024805
// avoid using more than half of the threadgroup memory - can cause slow downs especially for large head sizes
4803-
if (smem > props.max_theadgroup_memory_size/2) {
4806+
if (smem > ctx->props_dev.max_theadgroup_memory_size/2) {
48044807
break;
48054808
}
48064809
nsgmax *= 2;
@@ -4889,8 +4892,8 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
48894892

48904893
const size_t smem = FATTN_SMEM(nsg);
48914894

4892-
//printf("smem: %zu, max: %zu, nsg = %d, nsgmax = %d\n", smem, props.max_theadgroup_memory_size, (int) nsg, (int) nsgmax);
4893-
GGML_ASSERT(smem <= props.max_theadgroup_memory_size);
4895+
//printf("smem: %zu, max: %zu, nsg = %d, nsgmax = %d\n", smem, ctx->props_dev.max_theadgroup_memory_size, (int) nsg, (int) nsgmax);
4896+
GGML_ASSERT(smem <= ctx->props_dev.max_theadgroup_memory_size);
48944897

48954898
if (nwg == 1) {
48964899
// using 1 workgroup -> write the result directly into dst
@@ -5666,12 +5669,12 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
56665669
size_aligned += (size_page - (size_aligned % size_page));
56675670
}
56685671

5669-
struct ggml_backend_metal_device * ctx_dev = (struct ggml_backend_metal_device *)buft->device->context;
5672+
ggml_backend_metal_device_t ctx_dev = buft->device->context;
56705673

5671-
const struct ggml_backend_metal_device_props props = ggml_backend_metal_device_get_props(ctx_dev);
5674+
const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props(ctx_dev);
56725675

56735676
// allocate shared buffer if the device supports it and it is required by the buffer type
5674-
if (props.use_shared_buffers && shared) {
5677+
if (props_dev.use_shared_buffers && shared) {
56755678
ctx->all_data = ggml_metal_host_malloc(size_aligned);
56765679
ctx->is_shared = true;
56775680
} else {
@@ -5682,7 +5685,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
56825685
ctx->all_size = size_aligned;
56835686

56845687
ctx->device = ggml_backend_metal_device_get_device(ctx_dev);
5685-
ctx->queue = ggml_backend_metal_device_get_queue(ctx_dev);
5688+
ctx->queue = ggml_backend_metal_device_get_queue (ctx_dev);
56865689

56875690
ctx->n_buffers = 1;
56885691

@@ -5691,7 +5694,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
56915694
ctx->buffers[0].metal = nil;
56925695

56935696
if (size_aligned > 0) {
5694-
if (props.use_shared_buffers) {
5697+
if (props_dev.use_shared_buffers) {
56955698
ctx->buffers[0].metal = [ctx->device newBufferWithBytesNoCopy:ctx->all_data
56965699
length:size_aligned
56975700
options:MTLResourceStorageModeShared
@@ -5712,7 +5715,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
57125715
return NULL;
57135716
}
57145717

5715-
ctx->use_residency_sets = props.use_residency_sets;
5718+
ctx->use_residency_sets = props_dev.use_residency_sets;
57165719

57175720
if (!ggml_backend_metal_buffer_rset_init(ctx)) {
57185721
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
@@ -5771,7 +5774,7 @@ static size_t ggml_backend_metal_buffer_type_shared_get_alignment(ggml_backend_b
57715774
}
57725775

57735776
static size_t ggml_backend_metal_buffer_type_shared_get_max_size(ggml_backend_buffer_type_t buft) {
5774-
return ggml_backend_metal_device_get_props(((struct ggml_backend_metal_device *)buft->device->context)).max_buffer_size;
5777+
return ggml_backend_metal_device_get_props(buft->device->context).max_buffer_size;
57755778
}
57765779

57775780
static size_t ggml_backend_metal_buffer_type_shared_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
@@ -5820,7 +5823,7 @@ static size_t ggml_backend_metal_buffer_type_private_get_alignment(ggml_backend_
58205823
}
58215824

58225825
static size_t ggml_backend_metal_buffer_type_private_get_max_size(ggml_backend_buffer_type_t buft) {
5823-
return ggml_backend_metal_device_get_props(((struct ggml_backend_metal_device *)buft->device->context)).max_buffer_size;
5826+
return ggml_backend_metal_device_get_props(buft->device->context).max_buffer_size;
58245827
}
58255828

58265829
static size_t ggml_backend_metal_buffer_type_private_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
@@ -5870,7 +5873,7 @@ static size_t ggml_backend_metal_buffer_type_mapped_get_alignment(ggml_backend_b
58705873
}
58715874

58725875
static size_t ggml_backend_metal_buffer_type_mapped_get_max_size(ggml_backend_buffer_type_t buft) {
5873-
return ggml_backend_metal_device_get_props(((struct ggml_backend_metal_device *)buft->device->context)).max_buffer_size;
5876+
return ggml_backend_metal_device_get_props(buft->device->context).max_buffer_size;
58745877
}
58755878

58765879
static size_t ggml_backend_metal_buffer_type_mapped_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
@@ -6235,15 +6238,11 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) {
62356238
}
62366239

62376240
static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) {
6238-
struct ggml_backend_metal_device * ctx_dev = (struct ggml_backend_metal_device *)dev->context;
6239-
6240-
return ggml_backend_metal_device_get_props(ctx_dev).name;
6241+
return ggml_backend_metal_device_get_props(dev->context).name;
62416242
}
62426243

62436244
static void ggml_backend_metal_device_get_memory_ext(ggml_backend_dev_t dev, size_t * free, size_t * total) {
6244-
struct ggml_backend_metal_device * ctx_dev = (struct ggml_backend_metal_device *)dev->context;
6245-
6246-
ggml_backend_metal_device_get_memory(ctx_dev, free, total);
6245+
ggml_backend_metal_device_get_memory(dev->context, free, total);
62476246
}
62486247

62496248
static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backend_dev_t dev) {
@@ -6291,11 +6290,9 @@ static ggml_backend_t ggml_backend_metal_device_init_ext(ggml_backend_dev_t dev,
62916290
}
62926291

62936292
static ggml_backend_buffer_type_t ggml_backend_metal_device_get_buffer_type(ggml_backend_dev_t dev) {
6294-
struct ggml_backend_metal_device * ctx_dev = dev->context;
6295-
6296-
const struct ggml_backend_metal_device_props props = ggml_backend_metal_device_get_props(ctx_dev);
6293+
const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props(dev->context);
62976294

6298-
return props.use_shared_buffers ? ggml_backend_metal_buffer_type_shared() : ggml_backend_metal_buffer_type_private();
6295+
return props_dev.use_shared_buffers ? ggml_backend_metal_buffer_type_shared() : ggml_backend_metal_buffer_type_private();
62996296
}
63006297

63016298
static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
@@ -6322,15 +6319,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
63226319
size_aligned += (size_page - (size_aligned % size_page));
63236320
}
63246321

6325-
struct ggml_backend_metal_device * ctx_dev = (struct ggml_backend_metal_device *)dev->context;
6326-
6327-
ctx->device = ggml_backend_metal_device_get_device(ctx_dev);
6328-
ctx->queue = ggml_backend_metal_device_get_queue(ctx_dev);
6322+
ctx->device = ggml_backend_metal_device_get_device(dev->context);
6323+
ctx->queue = ggml_backend_metal_device_get_queue (dev->context);
63296324

6330-
const struct ggml_backend_metal_device_props props = ggml_backend_metal_device_get_props(ctx_dev);
6325+
const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props(dev->context);
63316326

63326327
// the buffer fits into the max buffer size allowed by the device
6333-
if (size_aligned <= props.max_buffer_size) {
6328+
if (size_aligned <= props_dev.max_buffer_size) {
63346329
ctx->buffers[ctx->n_buffers].data = ptr;
63356330
ctx->buffers[ctx->n_buffers].size = size;
63366331
ctx->buffers[ctx->n_buffers].metal = nil;
@@ -6351,8 +6346,8 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
63516346
// this overlap between the views will guarantee that the tensor with the maximum size will fully fit into
63526347
// one of the views
63536348
const size_t size_ovlp = ((max_tensor_size + size_page - 1) / size_page + 1) * size_page; // round-up 2 pages just in case
6354-
const size_t size_step = props.max_buffer_size - size_ovlp;
6355-
const size_t size_view = props.max_buffer_size;
6349+
const size_t size_step = props_dev.max_buffer_size - size_ovlp;
6350+
const size_t size_view = props_dev.max_buffer_size;
63566351

63576352
for (size_t i = 0; i < size; i += size_step) {
63586353
const size_t size_step_aligned = (i + size_view <= size) ? size_view : (size_aligned - i);
@@ -6380,7 +6375,7 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
63806375
}
63816376
}
63826377

6383-
ctx->use_residency_sets = props.use_residency_sets;
6378+
ctx->use_residency_sets = props_dev.use_residency_sets;
63846379

63856380
if (!ggml_backend_metal_buffer_rset_init(ctx)) {
63866381
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
@@ -6392,9 +6387,9 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_mapped(ggml_backen
63926387
}
63936388

63946389
static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
6395-
struct ggml_backend_metal_device * ctx_dev = dev->context;
6390+
const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props(dev->context);
63966391

6397-
return ggml_metal_supports_op(ctx_dev, op);
6392+
return ggml_metal_supports_op(&props_dev, op);
63986393
}
63996394

64006395
static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {

0 commit comments

Comments
 (0)