@@ -449,7 +449,7 @@ static void ggml_backend_metal_buffer_rset_free(ggml_backend_metal_buffer_t ctx)
449449 return data;
450450}
451451
452- ggml_backend_metal_buffer_t ggml_backend_metal_buffer_init (ggml_backend_metal_device_t device , size_t size, bool shared) {
452+ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_init (ggml_backend_metal_device_t ctx , size_t size, bool shared) {
453453 ggml_backend_metal_buffer_t res = calloc (1 , sizeof (struct ggml_backend_metal_buffer));
454454
455455 const size_t size_page = sysconf (_SC_PAGESIZE);
@@ -459,7 +459,7 @@ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_init(ggml_backend_metal_de
459459 size_aligned += (size_page - (size_aligned % size_page));
460460 }
461461
462- const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props (device );
462+ const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props (ctx );
463463
464464 shared = shared && props_dev.use_shared_buffers ;
465465
@@ -474,8 +474,8 @@ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_init(ggml_backend_metal_de
474474 }
475475 res->all_size = size_aligned;
476476
477- res->device = ggml_backend_metal_device_get_device (device );
478- res->queue = ggml_backend_metal_device_get_queue (device );
477+ res->device = ggml_backend_metal_device_get_device (ctx );
478+ res->queue = ggml_backend_metal_device_get_queue (ctx );
479479
480480 res->n_buffers = 1 ;
481481
@@ -518,7 +518,7 @@ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_init(ggml_backend_metal_de
518518 return res;
519519}
520520
521- ggml_backend_metal_buffer_t ggml_backend_metal_buffer_map (ggml_backend_metal_device_t device , void * ptr, size_t size, size_t max_tensor_size) {
521+ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_map (ggml_backend_metal_device_t ctx , void * ptr, size_t size, size_t max_tensor_size) {
522522 ggml_backend_metal_buffer_t res = calloc (1 , sizeof (struct ggml_backend_metal_buffer));
523523
524524 res->all_data = ptr;
@@ -542,10 +542,10 @@ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_map(ggml_backend_metal_dev
542542 size_aligned += (size_page - (size_aligned % size_page));
543543 }
544544
545- res->device = ggml_backend_metal_device_get_device (device );
546- res->queue = ggml_backend_metal_device_get_queue (device );
545+ res->device = ggml_backend_metal_device_get_device (ctx );
546+ res->queue = ggml_backend_metal_device_get_queue (ctx );
547547
548- const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props (device );
548+ const struct ggml_backend_metal_device_props props_dev = ggml_backend_metal_device_get_props (ctx );
549549
550550 // the buffer fits into the max buffer size allowed by the device
551551 if (size_aligned <= props_dev.max_buffer_size ) {
@@ -611,44 +611,44 @@ ggml_backend_metal_buffer_t ggml_backend_metal_buffer_map(ggml_backend_metal_dev
611611 return res;
612612}
613613
614- void ggml_backend_metal_buffer_free (ggml_backend_metal_buffer_t buffer ) {
615- for (int i = 0 ; i < buffer ->n_buffers ; i++) {
616- [buffer ->buffers[i].metal release ];
614+ void ggml_backend_metal_buffer_free (ggml_backend_metal_buffer_t ctx ) {
615+ for (int i = 0 ; i < ctx ->n_buffers ; i++) {
616+ [ctx ->buffers[i].metal release ];
617617 }
618618
619- ggml_backend_metal_buffer_rset_free (buffer );
619+ ggml_backend_metal_buffer_rset_free (ctx );
620620
621- if (buffer ->is_shared ) {
621+ if (ctx ->is_shared ) {
622622#if TARGET_OS_OSX
623- vm_deallocate ((vm_map_t )mach_task_self (), (vm_address_t )buffer ->all_data , buffer ->all_size );
623+ vm_deallocate ((vm_map_t )mach_task_self (), (vm_address_t )ctx ->all_data , ctx ->all_size );
624624#else
625- free (buffer ->all_data );
625+ free (ctx ->all_data );
626626#endif
627627 }
628628
629- free (buffer );
629+ free (ctx );
630630}
631631
632- void * ggml_backend_metal_buffer_get_base (ggml_backend_metal_buffer_t buffer ) {
633- return buffer ->all_data ;
632+ void * ggml_backend_metal_buffer_get_base (ggml_backend_metal_buffer_t ctx ) {
633+ return ctx ->all_data ;
634634}
635635
636- bool ggml_backend_metal_buffer_is_shared (ggml_backend_metal_buffer_t buffer ) {
637- return buffer ->is_shared ;
636+ bool ggml_backend_metal_buffer_is_shared (ggml_backend_metal_buffer_t ctx ) {
637+ return ctx ->is_shared ;
638638}
639639
640- void ggml_backend_metal_buffer_memset_tensor (ggml_backend_metal_buffer_t buffer , struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
641- if (buffer ->is_shared ) {
640+ void ggml_backend_metal_buffer_memset_tensor (ggml_backend_metal_buffer_t ctx , struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
641+ if (ctx ->is_shared ) {
642642 memset ((char *)tensor->data + offset, value, size);
643643 return ;
644644 }
645645
646646 @autoreleasepool {
647647 // dst
648- struct ggml_backend_metal_buffer_id buf_dst = ggml_backend_metal_buffer_get_id (buffer , tensor);
648+ struct ggml_backend_metal_buffer_id buf_dst = ggml_backend_metal_buffer_get_id (ctx , tensor);
649649 buf_dst.offs += offset;
650650
651- id <MTLCommandQueue > queue = buffer ->queue ;
651+ id <MTLCommandQueue > queue = ctx ->queue ;
652652 id <MTLCommandBuffer > cmd_buf = [queue commandBufferWithUnretainedReferences ];
653653
654654 {
@@ -666,29 +666,29 @@ void ggml_backend_metal_buffer_memset_tensor(ggml_backend_metal_buffer_t buffer,
666666 }
667667}
668668
669- void ggml_backend_metal_buffer_set_tensor (ggml_backend_metal_buffer_t buffer , struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
670- if (buffer ->is_shared ) {
669+ void ggml_backend_metal_buffer_set_tensor (ggml_backend_metal_buffer_t ctx , struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
670+ if (ctx ->is_shared ) {
671671 memcpy ((char *)tensor->data + offset, data, size);
672672 return ;
673673 }
674674
675675 @autoreleasepool {
676676 // src
677677 void * data_ptr = (void *)(uintptr_t ) data; // "const cast" the src data
678- id <MTLBuffer > buf_src = [buffer ->device newBufferWithBytesNoCopy: data_ptr
678+ id <MTLBuffer > buf_src = [ctx ->device newBufferWithBytesNoCopy: data_ptr
679679 length: size
680680 options: MTLResourceStorageModeShared
681681 deallocator: nil ];
682682
683683 // dst
684- struct ggml_backend_metal_buffer_id buf_dst = ggml_backend_metal_buffer_get_id (buffer , tensor);
684+ struct ggml_backend_metal_buffer_id buf_dst = ggml_backend_metal_buffer_get_id (ctx , tensor);
685685 buf_dst.offs += offset;
686686
687687 // note: for experimentation purposes, here we use a semaphore to wait for the copy to complete
688688 // this is alternative to waitUntilCompleted, which should be faster, but don't seem to make much difference
689689 dispatch_semaphore_t completion_semaphore = dispatch_semaphore_create (0 );
690690
691- id <MTLCommandQueue > queue = buffer ->queue ;
691+ id <MTLCommandQueue > queue = ctx ->queue ;
692692 id <MTLCommandBuffer > cmd_buf = [queue commandBufferWithUnretainedReferences ];
693693
694694 {
@@ -717,24 +717,24 @@ void ggml_backend_metal_buffer_set_tensor(ggml_backend_metal_buffer_t buffer, st
717717 }
718718}
719719
720- void ggml_backend_metal_buffer_get_tensor (ggml_backend_metal_buffer_t buffer , const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
721- if (buffer ->is_shared ) {
720+ void ggml_backend_metal_buffer_get_tensor (ggml_backend_metal_buffer_t ctx , const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
721+ if (ctx ->is_shared ) {
722722 memcpy (data, (const char *)tensor->data + offset, size);
723723 return ;
724724 }
725725
726726 @autoreleasepool {
727727 // src
728- struct ggml_backend_metal_buffer_id buf_src = ggml_backend_metal_buffer_get_id (buffer , tensor);
728+ struct ggml_backend_metal_buffer_id buf_src = ggml_backend_metal_buffer_get_id (ctx , tensor);
729729 buf_src.offs += offset;
730730
731731 // dst
732- id <MTLBuffer > buf_dst = [buffer ->device newBufferWithBytesNoCopy: data
733- length: size
734- options: MTLResourceStorageModeShared
735- deallocator: nil ];
732+ id <MTLBuffer > buf_dst = [ctx ->device newBufferWithBytesNoCopy: data
733+ length: size
734+ options: MTLResourceStorageModeShared
735+ deallocator: nil ];
736736
737- id <MTLCommandQueue > queue = buffer ->queue ;
737+ id <MTLCommandQueue > queue = ctx ->queue ;
738738 id <MTLCommandBuffer > cmd_buf = [queue commandBufferWithUnretainedReferences ];
739739
740740 {
@@ -754,21 +754,21 @@ void ggml_backend_metal_buffer_get_tensor(ggml_backend_metal_buffer_t buffer, co
754754 }
755755}
756756
757- void ggml_backend_metal_buffer_clear (ggml_backend_metal_buffer_t buffer , uint8_t value) {
758- if (buffer ->is_shared ) {
759- memset (buffer ->all_data , value, buffer ->all_size );
757+ void ggml_backend_metal_buffer_clear (ggml_backend_metal_buffer_t ctx , uint8_t value) {
758+ if (ctx ->is_shared ) {
759+ memset (ctx ->all_data , value, ctx ->all_size );
760760 return ;
761761 }
762762
763763 @autoreleasepool {
764- id <MTLCommandQueue > queue = buffer ->queue ;
764+ id <MTLCommandQueue > queue = ctx ->queue ;
765765 id <MTLCommandBuffer > cmd_buf = [queue commandBufferWithUnretainedReferences ];
766766
767767 {
768768 id <MTLBlitCommandEncoder > encoder = [cmd_buf blitCommandEncoder ];
769769
770- [encoder fillBuffer: buffer ->buffers[0 ].metal
771- range: NSMakeRange (0 , buffer ->buffers[0 ].size)
770+ [encoder fillBuffer: ctx ->buffers[0 ].metal
771+ range: NSMakeRange (0 , ctx ->buffers[0 ].size)
772772 value: value];
773773
774774 [encoder endEncoding ];
@@ -779,18 +779,18 @@ void ggml_backend_metal_buffer_clear(ggml_backend_metal_buffer_t buffer, uint8_t
779779 }
780780}
781781
782- struct ggml_backend_metal_buffer_id ggml_backend_metal_buffer_get_id (ggml_backend_metal_buffer_t buf , const struct ggml_tensor * t) {
782+ struct ggml_backend_metal_buffer_id ggml_backend_metal_buffer_get_id (ggml_backend_metal_buffer_t ctx , const struct ggml_tensor * t) {
783783 struct ggml_backend_metal_buffer_id res = { nil , 0 };
784784
785785 const int64_t tsize = ggml_nbytes (t);
786786
787787 // find the view that contains the tensor fully
788- for (int i = 0 ; i < buf ->n_buffers ; ++i) {
789- const int64_t ioffs = (int64_t ) t->data - (int64_t ) buf ->buffers [i].data ;
788+ for (int i = 0 ; i < ctx ->n_buffers ; ++i) {
789+ const int64_t ioffs = (int64_t ) t->data - (int64_t ) ctx ->buffers [i].data ;
790790
791- // GGML_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf ->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf ->buffers[i].size);
792- if (ioffs >= 0 && ioffs + tsize <= (int64_t ) buf ->buffers [i].size ) {
793- res.metal = buf ->buffers [i].metal ;
791+ // GGML_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx ->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, ctx ->buffers[i].size);
792+ if (ioffs >= 0 && ioffs + tsize <= (int64_t ) ctx ->buffers [i].size ) {
793+ res.metal = ctx ->buffers [i].metal ;
794794 res.offs = (size_t ) ioffs;
795795
796796 // GGML_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
0 commit comments