11#include  "ggml-alloc.h" 
2+ #include  "ggml-backend.h" 
23#include  "ggml.h" 
34#include  <assert.h> 
45#include  <stdarg.h> 
56#include  <stdio.h> 
67#include  <stdlib.h> 
78#include  <string.h> 
89
9- #ifdef  __has_include 
10-     #if  __has_include (< unistd .h > )
11-         #include  <unistd.h> 
12-         #if  defined(_POSIX_MAPPED_FILES )
13-             #include  <sys/types.h> 
14-             #include  <sys/mman.h> 
15-         #endif 
16-     #endif 
17- #endif 
18- 
19- #if  defined(_WIN32 )
20-     #define  WIN32_LEAN_AND_MEAN 
21-     #ifndef  NOMINMAX 
22-         #define  NOMINMAX 
23-     #endif 
24-     #include  <windows.h> 
25-     #include  <memoryapi.h> 
26- #endif 
27- 
2810
2911#define  UNUSED (x ) (void)(x)
3012#define  MAX (a , b ) ((a) > (b) ? (a) : (b))
@@ -80,8 +62,9 @@ struct free_block {
8062#define  MAX_FREE_BLOCKS  256
8163
8264struct  ggml_allocr  {
65+     struct  ggml_backend_buffer  *  buffer ;
66+     bool  buffer_owned ;
8367    void  *  data ;
84-     size_t  size ;
8568    size_t  alignment ;
8669    int  n_free_blocks ;
8770    struct  free_block  free_blocks [MAX_FREE_BLOCKS ];
@@ -119,28 +102,20 @@ static void remove_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tens
119102}
120103#endif 
121104
122- static  size_t  ggml_allocr_get_alloc_size (struct  ggml_allocr  *  alloc , struct  ggml_tensor  *  tensor ) {
123-     return  ggml_nbytes (tensor );
124- 
125-     UNUSED (alloc );
126- }
127- 
128105// check if a tensor is allocated by this buffer 
129106static  bool  ggml_allocr_is_own (struct  ggml_allocr  *  alloc , const  struct  ggml_tensor  *  tensor ) {
130-     void  *  ptr  =  tensor -> data ;
131-     return  ptr  >= alloc -> data  &&  (char  * )ptr  <  (char  * )alloc -> data  +  alloc -> max_size ;
107+     return  tensor -> buffer  ==  alloc -> buffer ;
132108}
133109
134110static  bool  ggml_is_view (struct  ggml_tensor  *  t ) {
135111    return  t -> view_src  !=  NULL ;
136112}
137113
138114void  ggml_allocr_alloc (struct  ggml_allocr  *  alloc , struct  ggml_tensor  *  tensor ) {
139- #ifdef  GGML_ALLOCATOR_DEBUG 
140115    GGML_ASSERT (!ggml_is_view (tensor )); // views generally get data pointer from one of their sources 
141116    GGML_ASSERT (tensor -> data  ==  NULL ); // avoid allocating tensor which already has memory allocated 
142- #endif 
143-     size_t  size  =  ggml_allocr_get_alloc_size (alloc , tensor );
117+ 
118+     size_t  size  =  ggml_backend_buffer_get_alloc_size (alloc -> buffer , tensor );
144119    size  =  aligned_offset (NULL , size , alloc -> alignment );
145120
146121    AT_PRINTF ("%s: allocating %s (%zu bytes) - " , __func__ , tensor -> name , size );
@@ -188,6 +163,8 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
188163
189164    tensor -> data  =  addr ;
190165    AT_PRINTF ("%s: allocated data at %p\n" , __func__ , tensor -> data );
166+     tensor -> buffer  =  alloc -> buffer ;
167+     ggml_backend_buffer_init_tensor (alloc -> buffer , tensor );
191168
192169#ifdef  GGML_ALLOCATOR_DEBUG 
193170    add_allocated_tensor (alloc , tensor );
@@ -208,19 +185,21 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
208185
209186// this is a very naive implementation, but for our case the number of free blocks should be very small 
210187static  void  ggml_allocr_free_tensor (struct  ggml_allocr  *  alloc , struct  ggml_tensor  *  tensor ) {
211-     void  *  ptr  =  tensor -> data ;
212- 
213188    if  (ggml_allocr_is_own (alloc , tensor ) ==  false) {
214189        // the tensor was not allocated in this buffer 
215190        // this can happen because the graph allocator will try to free weights and other tensors from different buffers 
216191        // the easiest way to deal with this is just to ignore it 
192+         AT_PRINTF ("ignoring %s (their buffer: %p, our buffer: %p)\n" , tensor -> name , (void  * )tensor -> buffer , (void  * )alloc -> buffer );
217193        return ;
218194    }
219195
220-     size_t  size  =  ggml_allocr_get_alloc_size (alloc , tensor );
196+     void  *  ptr  =  tensor -> data ;
197+ 
198+     size_t  size  =  ggml_backend_buffer_get_alloc_size (alloc -> buffer , tensor );
221199    size  =  aligned_offset (NULL , size , alloc -> alignment );
222200    AT_PRINTF ("%s: freeing %s at %p (%zu bytes) - n_free_blocks = %d\n" , __func__ , tensor -> name , ptr , size , alloc -> n_free_blocks );
223-     AT_PRINTF ("%s: alloc->data = %p alloc->data+alloc->size = %p alloc->data+alloc->max_size = %p\n" , __func__ , alloc -> data , (char * )alloc -> data  +  alloc -> size , (char * )alloc -> data  +  alloc -> max_size );
201+ 
202+     ggml_backend_buffer_free_tensor (alloc -> buffer , tensor );
224203
225204#ifdef  GGML_ALLOCATOR_DEBUG 
226205    remove_allocated_tensor (alloc , tensor );
@@ -285,15 +264,18 @@ void ggml_allocr_reset(struct ggml_allocr * alloc) {
285264    alloc -> n_free_blocks  =  1 ;
286265    size_t  align_offset  =  aligned_offset (alloc -> data , 0 , alloc -> alignment );
287266    alloc -> free_blocks [0 ].addr  =  (char  * )alloc -> data  +  align_offset ;
288-     alloc -> free_blocks [0 ].size  =  alloc -> size  -  align_offset ;
267+     alloc -> free_blocks [0 ].size  =  ggml_backend_buffer_get_size ( alloc -> buffer )  -  align_offset ;
289268}
290269
291270struct  ggml_allocr  *  ggml_allocr_new (void  *  data , size_t  size , size_t  alignment ) {
292-     struct  ggml_allocr  *  alloc  =  (struct  ggml_allocr  * )malloc (sizeof (struct  ggml_allocr ) /* + n_free_blocks * sizeof(struct free_block) */ );
271+     struct  ggml_backend_buffer  *  buffer  =  ggml_backend_cpu_buffer_from_ptr (NULL , data , size );
272+ 
273+     struct  ggml_allocr  *  alloc  =  (struct  ggml_allocr  * )malloc (sizeof (struct  ggml_allocr ));
293274
294275    * alloc  =  (struct  ggml_allocr ){
295-         /*.data          = */  data ,
296-         /*.size          = */  size ,
276+         /*.buffer        = */  buffer ,
277+         /*.buffer_owned  = */  true,
278+         /*.base          = */  ggml_backend_buffer_get_base (buffer ),
297279        /*.alignment     = */  alignment ,
298280        /*.n_free_blocks = */  0 ,
299281        /*.free_blocks   = */  {{0 }},
@@ -312,74 +294,26 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
312294    return  alloc ;
313295}
314296
315- // OS specific functions to allocate and free uncommitted virtual memory 
316- static  void  *  alloc_vmem (size_t  size ) {
317- #if  defined(_WIN32 )
318-     return  VirtualAlloc (NULL , size , MEM_RESERVE , PAGE_NOACCESS );
319- #elif  defined(_POSIX_MAPPED_FILES )
320-     void  *  ptr  =  mmap (NULL , size , PROT_NONE , MAP_PRIVATE  | MAP_ANON , -1 , 0 );
321-     if  (ptr  ==  MAP_FAILED ) {
322-         return  NULL ;
323-     }
324-     return  ptr ;
325- #else 
326-     // use a fixed address for other platforms 
327-     uintptr_t  base_addr  =  (uintptr_t )- size  -  0x100 ;
328-     return  (void  * )base_addr ;
329- #endif 
330- }
331- 
332- static  void  free_vmem (void  *  base_addr , size_t  size ) {
333- #if  defined(_WIN32 )
334-     VirtualFree (base_addr , 0 , MEM_RELEASE );
335-     UNUSED (size );
336- #elif  defined(_POSIX_MAPPED_FILES )
337-     munmap (base_addr , size );
338- #else 
339-     // nothing to do 
340-     UNUSED (base_addr );
341-     UNUSED (size );
342- #endif 
343- }
344- 
345- // allocate uncommitted virtual memory to measure the size of the graph 
346- static  void  alloc_measure_vmem (void  * *  base_addr , size_t  *  size ) {
347-     // 128GB for 64-bit, 1GB for 32-bit 
348-     * size  =  sizeof (void  * ) ==  4  ? 1ULL <<30  : 1ULL <<37 ;
349-     do  {
350-         * base_addr  =  alloc_vmem (* size );
351-         if  (* base_addr  !=  NULL ) {
352-             AT_PRINTF ("allocated %.2f GB of virtual memory for measure buffer at %p\n" , * size  / 1024.0  / 1024.0  / 1024.0 , * base_addr );
353-             return ;
354-         }
355-         // try again with half the size 
356-         * size  /= 2 ;
357-     } while  (* size  >  0 );
358- 
359-     GGML_ASSERT (!"failed to allocate virtual memory for measure buffer" );
360- }
361- 
362- static  void  free_measure_vmem (void  *  base_addr , size_t  size ) {
363-     free_vmem (base_addr , size );
364- }
365- 
366297struct  ggml_allocr  *  ggml_allocr_new_measure (size_t  alignment ) {
367-     struct  ggml_allocr  *  alloc  =  (struct  ggml_allocr  * )malloc (sizeof (struct  ggml_allocr ) /* + n_free_blocks * sizeof(struct free_block) */ );
298+     struct  ggml_allocr  *  alloc  =  ggml_allocr_new ((void  * )0x1000 , (size_t )-0x1001 , alignment );
299+     alloc -> measure  =  true;
368300
369-     void   *   base_addr ;
370-      size_t   size ; 
301+     return   alloc ;
302+ } 
371303
372-     alloc_measure_vmem (& base_addr , & size );
304+ struct  ggml_allocr  *  ggml_allocr_new_from_buffer (struct  ggml_backend_buffer  *  buffer ) {
305+     struct  ggml_allocr  *  alloc  =  (struct  ggml_allocr  * )malloc (sizeof (struct  ggml_allocr ));
373306
374307    * alloc  =  (struct  ggml_allocr ){
375-         /*.data          = */  base_addr ,
376-         /*.size          = */  size ,
377-         /*.alignment     = */  alignment ,
308+         /*.buffer        = */  buffer ,
309+         /*.buffer_owned  = */  false,
310+         /*.base          = */  ggml_backend_buffer_get_base (buffer ),
311+         /*.alignment     = */  ggml_backend_buffer_get_alignment (buffer ),
378312        /*.n_free_blocks = */  0 ,
379313        /*.free_blocks   = */  {{0 }},
380314        /*.hash_table    = */  {{0 }},
381315        /*.max_size      = */  0 ,
382-         /*.measure       = */  true ,
316+         /*.measure       = */  false ,
383317        /*.parse_seq     = */  {0 },
384318        /*.parse_seq_len = */  0 ,
385319#ifdef  GGML_ALLOCATOR_DEBUG 
@@ -393,8 +327,8 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
393327}
394328
395329void  ggml_allocr_free (struct  ggml_allocr  *  alloc ) {
396-     if  (alloc -> measure ) {
397-         free_measure_vmem (alloc -> data ,  alloc -> size );
330+     if  (alloc -> buffer_owned ) {
331+         ggml_backend_buffer_free (alloc -> buffer );
398332    }
399333    free (alloc );
400334}
@@ -437,20 +371,30 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
437371        case  GGML_OP_ROPE :
438372        case  GGML_OP_RMS_NORM :
439373        case  GGML_OP_SOFT_MAX :
440-         case  GGML_OP_CONT :
441374            return  true;
442375
443376        default :
444377            return  false;
445378    }
446379}
447380
381+ static  void  init_view (struct  ggml_allocr  *  alloc , struct  ggml_tensor  *  view ) {
382+     assert (view -> view_src  !=  NULL  &&  view -> view_src -> data  !=  NULL );
383+     view -> backend  =  view -> view_src -> backend ;
384+     view -> buffer   =  view -> view_src -> buffer ;
385+     view -> data     =  (char  * )view -> view_src -> data  +  view -> view_offs ;
386+ 
387+     // FIXME: the view should be initialized by the owning buffer, but currently this breaks the CUDA backend 
388+     // due to the ggml_tensor_extra_gpu ring buffer overwriting the KV cache extras 
389+     assert (ggml_allocr_is_measure (alloc ) ||  view -> buffer -> backend  ==  alloc -> buffer -> backend );
390+     ggml_backend_buffer_init_tensor (alloc -> buffer , view );
391+ }
392+ 
448393static  void  allocate_node (struct  ggml_allocr  *  alloc , struct  ggml_tensor  *  node ) {
449394    struct  hash_node  *  ht  =  alloc -> hash_table ;
450395    if  (node -> data  ==  NULL ) {
451396        if  (ggml_is_view (node )) {
452-             assert (node -> view_src -> data  !=  NULL );
453-             node -> data  =  (char  * )node -> view_src -> data  +  node -> view_offs ;
397+             init_view (alloc , node );
454398        } else  {
455399            // see if we can reuse a parent's buffer (inplace) 
456400            if  (ggml_op_can_inplace (node -> op )) {
@@ -478,13 +422,17 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
478422                                // adding a view_src pointer to the tensor would solve this and simplify the code dealing with views 
479423                                // for now, we only reuse the parent's data if the offset is zero (view_src->data == parent->data) 
480424                                AT_PRINTF ("reusing view parent %s (%s) for %s\n" , parent -> name , view_src -> name , node -> name );
481-                                 node -> data  =  parent -> data ;
425+                                 node -> view_src  =  view_src ;
426+                                 view_src_hn -> n_views  +=  1 ;
427+                                 init_view (alloc , node );
482428                                return ;
483429                            }
484430                        }
485431                        else  {
486432                            AT_PRINTF ("reusing parent %s for %s\n" , parent -> name , node -> name );
487-                             node -> data  =  parent -> data ;
433+                             node -> view_src  =  parent ;
434+                             p_hn -> n_views  +=  1 ;
435+                             init_view (alloc , node );
488436                            return ;
489437                        }
490438                    }
@@ -495,7 +443,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
495443    }
496444}
497445
498- static   size_t  ggml_allocr_alloc_graph_tensors_n (
446+ size_t  ggml_allocr_alloc_graph_n (
499447    struct  ggml_allocr  *  alloc ,
500448    struct  ggml_cgraph  * *  graphs , int  n_graphs ,
501449    struct  ggml_tensor  * * *  inputs , struct  ggml_tensor  * * *  outputs ) {
@@ -513,6 +461,10 @@ static size_t ggml_allocr_alloc_graph_tensors_n(
513461            if  (ggml_is_view (node )) {
514462                struct  ggml_tensor  *  view_src  =  node -> view_src ;
515463                hash_get (ht , view_src )-> n_views  +=  1 ;
464+                 if  (node -> buffer  ==  NULL  &&  node -> data  !=  NULL ) {
465+                     // view of a pre-allocated tensor, didn't call init_view() yet 
466+                     init_view (alloc , node );
467+                 }
516468            }
517469
518470            for  (int  j  =  0 ; j  <  GGML_MAX_SRC ; j ++ ) {
@@ -521,6 +473,9 @@ static size_t ggml_allocr_alloc_graph_tensors_n(
521473                    break ;
522474                }
523475                hash_get (ht , parent )-> n_children  +=  1 ;
476+                 if  (ggml_is_view (parent ) &&  parent -> buffer  ==  NULL  &&  parent -> data  !=  NULL ) {
477+                     init_view (alloc , parent );
478+                 }
524479            }
525480        }
526481    }
@@ -631,7 +586,7 @@ static size_t ggml_allocr_alloc_graph_tensors_n(
631586}
632587
633588size_t  ggml_allocr_alloc_graph (struct  ggml_allocr  *  alloc , struct  ggml_cgraph  *  graph ) {
634-     return  ggml_allocr_alloc_graph_tensors_n (alloc , & graph , 1 , NULL , NULL );
589+     return  ggml_allocr_alloc_graph_n (alloc , & graph , 1 , NULL , NULL );
635590}
636591
637592size_t  ggml_allocr_max_size (struct  ggml_allocr  *  alloc ) {
0 commit comments