@@ -466,11 +466,11 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t ctx_dev) {
466466
467467 res->ctx_dev = ctx_dev;
468468
469- struct ggml_metal_device_props props_dev = ggml_metal_device_get_props (ctx_dev);
469+ const struct ggml_metal_device_props * props_dev = ggml_metal_device_get_props (ctx_dev);
470470
471471 res->d_queue = dispatch_queue_create (" ggml-metal" , DISPATCH_QUEUE_CONCURRENT);
472472
473- res->use_bfloat = props_dev. has_bfloat ;
473+ res->use_bfloat = props_dev-> has_bfloat ;
474474 res->use_fusion = getenv (" GGML_METAL_FUSION_DISABLE" ) == nil ;
475475 res->use_concurrency = getenv (" GGML_METAL_CONCURRENCY_DISABLE" ) == nil ;
476476
@@ -540,9 +540,9 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t ctx_dev) {
540540 GGML_LOG_WARN (" %s : skipping %-40s (not supported)\n " , __func__, " kernel_" #name); \
541541 }
542542
543- const bool has_simdgroup_mm = props_dev. has_simdgroup_mm ;
544- const bool has_simdgroup_reduction = props_dev. has_simdgroup_reduction ;
545- const bool has_bfloat = props_dev. has_bfloat ;
543+ const bool has_simdgroup_mm = props_dev-> has_simdgroup_mm ;
544+ const bool has_simdgroup_reduction = props_dev-> has_simdgroup_reduction ;
545+ const bool has_bfloat = props_dev-> has_bfloat ;
546546
547547 // simd_sum and simd_max requires MTLGPUFamilyApple7
548548
@@ -1258,7 +1258,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
12581258 }
12591259 }
12601260
1261- struct ggml_metal_device_props props_dev = ggml_metal_device_get_props (ctx->ctx_dev );
1261+ const struct ggml_metal_device_props * props_dev = ggml_metal_device_get_props (ctx->ctx_dev );
12621262
12631263 switch (dst->op ) {
12641264 case GGML_OP_CONCAT:
@@ -2637,7 +2637,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
26372637 } else
26382638 // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
26392639 // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
2640- if (props_dev. supports_gpu_family_apple7 &&
2640+ if (props_dev-> supports_gpu_family_apple7 &&
26412641 !ggml_is_transposed (src0) &&
26422642 !ggml_is_transposed (src1) &&
26432643 src1t == GGML_TYPE_F32 &&
@@ -2975,7 +2975,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
29752975
29762976 // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
29772977 // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
2978- if (props_dev. supports_gpu_family_apple7 &&
2978+ if (props_dev-> supports_gpu_family_apple7 &&
29792979 ne00 % 32 == 0 && ne00 >= 64 &&
29802980 (ne21 >= ne21_mm_id_min)) {
29812981 GGML_ASSERT (ne00 % 4 == 0 );
@@ -3024,7 +3024,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
30243024
30253025 const size_t smem = ne02*ne20*sizeof (uint16_t );
30263026
3027- GGML_ASSERT (smem <= props_dev. max_theadgroup_memory_size );
3027+ GGML_ASSERT (smem <= props_dev-> max_theadgroup_memory_size );
30283028
30293029 [encoder setComputePipelineState: pipeline];
30303030 [encoder setBytes: &args length: sizeof (args) atIndex: 0 ];
@@ -4209,7 +4209,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
42094209 // nsgmax = 2;
42104210 // while (true) {
42114211 // const size_t smem = FATTN_SMEM(nsgmax);
4212- // if (smem > props_dev. max_theadgroup_memory_size) {
4212+ // if (smem > props_dev-> max_theadgroup_memory_size) {
42134213 // break;
42144214 // }
42154215 // nsgmax *= 2;
@@ -4277,8 +4277,8 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
42774277
42784278 [encoder setBuffer: id_dst offset: offs_dst atIndex: 6 ];
42794279
4280- // printf("smem: %zu, max: %zu, nsg = %d, ne02 = %d, ne12 = %d\n", smem, props_dev. max_theadgroup_memory_size, (int) nsg, ne02, ne12);
4281- GGML_ASSERT (smem <= props_dev. max_theadgroup_memory_size );
4280+ // printf("smem: %zu, max: %zu, nsg = %d, ne02 = %d, ne12 = %d\n", smem, props_dev-> max_theadgroup_memory_size, (int) nsg, ne02, ne12);
4281+ GGML_ASSERT (smem <= props_dev-> max_theadgroup_memory_size );
42824282 [encoder setThreadgroupMemoryLength: smem atIndex: 0 ];
42834283 [encoder dispatchThreadgroups: MTLSizeMake ((ne01 + nqptg - 1 )/nqptg, ne02, ne03) threadsPerThreadgroup: MTLSizeMake (32 , nsg, 1 )];
42844284#undef FATTN_SMEM
@@ -4305,7 +4305,7 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
43054305 while (true ) {
43064306 const size_t smem = FATTN_SMEM (nsgmax);
43074307 // avoid using more than half of the threadgroup memory - can cause slow downs especially for large head sizes
4308- if (smem > props_dev. max_theadgroup_memory_size /2 ) {
4308+ if (smem > props_dev-> max_theadgroup_memory_size /2 ) {
43094309 break ;
43104310 }
43114311 nsgmax *= 2 ;
@@ -4394,8 +4394,8 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
43944394
43954395 const size_t smem = FATTN_SMEM (nsg);
43964396
4397- // printf("smem: %zu, max: %zu, nsg = %d, nsgmax = %d\n", smem, props_dev. max_theadgroup_memory_size, (int) nsg, (int) nsgmax);
4398- GGML_ASSERT (smem <= props_dev. max_theadgroup_memory_size );
4397+ // printf("smem: %zu, max: %zu, nsg = %d, nsgmax = %d\n", smem, props_dev-> max_theadgroup_memory_size, (int) nsg, (int) nsgmax);
4398+ GGML_ASSERT (smem <= props_dev-> max_theadgroup_memory_size );
43994399
44004400 if (nwg == 1 ) {
44014401 // using 1 workgroup -> write the result directly into dst
0 commit comments