3535#include <omp.h>
3636#endif
3737
38- #ifdef GGML_USE_METAL
39- #include <unistd.h>
40- #endif
41- 
4238#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
4339#undef GGML_USE_LLAMAFILE
4440#endif
@@ -189,6 +185,8 @@ typedef pthread_t ggml_thread_t;
189185#endif
190186
191187#if defined(__APPLE__)
188+ #include <unistd.h>
189+ #include <mach/mach.h>
192190#include <TargetConditionals.h>
193191#endif
194192
@@ -386,22 +384,40 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
386384//#define GGML_SOFT_MAX_ACCELERATE
387385#endif
388386
387+ 
388+ void * ggml_aligned_malloc(size_t size) {
389389#if defined(_MSC_VER) || defined(__MINGW32__)
390- #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
391- #define GGML_ALIGNED_FREE(ptr)    _aligned_free(ptr)
390+     return _aligned_malloc(size, TENSOR_ALIGNMENT);
392391#else
393- inline static void * ggml_aligned_malloc(size_t size) {
394392    if (size == 0) {
395393        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
396394        return NULL;
397395    }
398396    void * aligned_memory = NULL;
399397#ifdef GGML_USE_CPU_HBM
400-     int result = hbw_posix_memalign(&aligned_memory, 16, size);
398+     int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
399+ #elif TARGET_OS_OSX
400+     kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
401+     int result = EFAULT;
402+     switch (alloc_status) {
403+         case KERN_SUCCESS:
404+             result = 0;
405+             break;
406+         case KERN_INVALID_ADDRESS:
407+             result = EINVAL;
408+             break;
409+         case KERN_NO_SPACE:
410+             result = ENOMEM;
411+             break;
412+         default:
413+             result = EFAULT;
414+             break;
415+     }
401416#elif GGML_USE_METAL
402-     int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
417+     const long page_size = sysconf(_SC_PAGESIZE);
418+     int result = posix_memalign(&aligned_memory, MAX(TENSOR_ALIGNMENT, page_size), size);
403419#else
404-     int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN , size);
420+     int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT , size);
405421#endif
406422    if (result != 0) {
407423        // Handle allocation failure
@@ -419,14 +435,26 @@ inline static void * ggml_aligned_malloc(size_t size) {
419435        return NULL;
420436    }
421437    return aligned_memory;
438+ #endif
422439}
423- #define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
424- #ifdef GGML_USE_CPU_HBM
425- #define GGML_ALIGNED_FREE(ptr)    if(NULL != ptr) hbw_free(ptr)
440+ 
441+ void ggml_aligned_free(void * ptr, size_t size) {
442+     GGML_UNUSED(size);
443+ #if defined(_MSC_VER) || defined(__MINGW32__)
444+     _aligned_free(ptr);
445+ #elif GGML_USE_CPU_HBM
446+     if (ptr != NULL) {
447+         hbw_free(ptr);
448+     }
449+ #elif TARGET_OS_OSX
450+     if (ptr != NULL) {
451+         vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
452+     }
426453#else
427- #define GGML_ALIGNED_FREE(ptr)    free(ptr)
428- #endif
454+     free(ptr);
429455#endif
456+ }
457+ 
430458
431459inline static void * ggml_malloc(size_t size) {
432460    if (size == 0) {
@@ -3869,7 +3897,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38693897
38703898    *ctx = (struct ggml_context) {
38713899        /*.mem_size           =*/ mem_size,
3872-         /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC (mem_size),
3900+         /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc (mem_size),
38733901        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
38743902        /*.no_alloc           =*/ params.no_alloc,
38753903        /*.no_alloc_save      =*/ params.no_alloc,
@@ -3909,7 +3937,7 @@ void ggml_free(struct ggml_context * ctx) {
39093937                    __func__, i, ggml_used_mem(ctx));
39103938
39113939            if (ctx->mem_buffer_owned) {
3912-                 GGML_ALIGNED_FREE (ctx->mem_buffer);
3940+                 ggml_aligned_free (ctx->mem_buffer, ctx->mem_size );
39133941            }
39143942
39153943            found = true;
@@ -19608,9 +19636,10 @@ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask
1960819636void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
1960919637    if (!threadpool) return;
1961019638
19639+     const int n_threads = threadpool->n_threads_max;
19640+ 
1961119641#ifndef GGML_USE_OPENMP
1961219642    struct ggml_compute_state* workers = threadpool->workers;
19613-     const int n_threads = threadpool->n_threads_max;
1961419643
1961519644    ggml_mutex_lock(&threadpool->mutex);
1961619645
@@ -19630,8 +19659,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
1963019659    ggml_cond_destroy(&threadpool->cond);
1963119660#endif // GGML_USE_OPENMP
1963219661
19633-     GGML_ALIGNED_FREE(threadpool->workers);
19634-     GGML_ALIGNED_FREE(threadpool);
19662+     const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads;
19663+     ggml_aligned_free(threadpool->workers, workers_size);
19664+     ggml_aligned_free(threadpool, sizeof(struct ggml_threadpool));
1963519665}
1963619666
1963719667#ifndef GGML_USE_OPENMP
@@ -20063,7 +20093,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
2006320093                struct ggml_cplan * cplan) {
2006420094
2006520095    struct ggml_threadpool * threadpool =
20066-         GGML_ALIGNED_MALLOC (sizeof(struct ggml_threadpool));
20096+         ggml_aligned_malloc (sizeof(struct ggml_threadpool));
2006720097    {
2006820098        threadpool->cgraph           = cgraph;
2006920099        threadpool->cplan            = cplan;
@@ -20084,7 +20114,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
2008420114
2008520115    // Allocate and init workers state
2008620116    const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
20087-     struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC (workers_size);
20117+     struct ggml_compute_state * workers = ggml_aligned_malloc (workers_size);
2008820118
2008920119    memset(workers, 0, workers_size);
2009020120    for (int j = 0; j < tpp->n_threads; j++) {
0 commit comments