@@ -595,7 +595,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
595595}
596596
597597static  bool  ggml_gallocr_is_allocated (ggml_gallocr_t  galloc , struct  ggml_tensor  *  t ) {
598-     return  t -> data  !=  NULL  ||  ggml_gallocr_hash_get (galloc , t )-> allocated ;
598+     return  t -> data  !=  NULL  // tensor data already set externally 
599+         ||  t -> buffer  // tensor on external buffer (but not yet allocated) 
600+         ||  ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc 
599601}
600602
601603// free the extra space at the end if the new tensor is smaller 
@@ -813,7 +815,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
813815    }
814816}
815817
816- bool  ggml_gallocr_reserve_n (ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids ) {
818+ static  bool  ggml_gallocr_reserve_n_impl (
819+         ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids , bool  no_alloc ) {
817820    size_t  min_hash_size  =  graph -> n_nodes  +  graph -> n_leafs ;
818821    // add 25% margin to avoid hash collisions 
819822    min_hash_size  +=  min_hash_size  / 4 ;
@@ -915,21 +918,41 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
915918        if  (realloc ) {
916919#ifndef  NDEBUG 
917920            size_t  cur_size  =  galloc -> buffers [i ] ? ggml_vbuffer_size (galloc -> buffers [i ]) : 0 ;
918-             GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size  / 1024.0  / 1024.0 , new_size  / 1024.0  / 1024.0 );
921+             GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" ,
922+                 __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size  / 1024.0  / 1024.0 , new_size  / 1024.0  / 1024.0 );
919923#endif 
920924
921925            ggml_vbuffer_free (galloc -> buffers [i ]);
922-             galloc -> buffers [i ] =  ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
923-             if  (galloc -> buffers [i ] ==  NULL ) {
924-                 GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
925-                 return  false;
926+             if  (no_alloc ) {
927+                 galloc -> buffers [i ] =  NULL ;
928+             } else  {
929+                 galloc -> buffers [i ] =  ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
930+                 if  (galloc -> buffers [i ] ==  NULL ) {
931+                     GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
932+                     return  false;
933+                 }
926934            }
927935        }
928936    }
929937
930938    return  true;
931939}
932940
941+ void  ggml_gallocr_reserve_n_size (
942+         ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids , size_t  *  sizes ) {
943+     GGML_ASSERT (ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/  true));
944+     for  (int  i  =  0 ; i  <  galloc -> n_buffers ; i ++ ) {
945+         sizes [i ] =  0 ;
946+         for  (int  c  =  0 ; c  <  galloc -> buf_tallocs [i ]-> n_chunks ; c ++ ) {
947+             sizes [i ] +=  galloc -> buf_tallocs [i ]-> chunks [c ]-> max_size ;
948+         }
949+     }
950+ }
951+ 
952+ bool  ggml_gallocr_reserve_n (ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids ) {
953+     return  ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/  false);
954+ }
955+ 
933956bool  ggml_gallocr_reserve (ggml_gallocr_t  galloc , struct  ggml_cgraph  * graph ) {
934957    return  ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
935958}
@@ -1132,14 +1155,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
11321155    return  true;
11331156}
11341157
1135- ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors_from_buft (struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft ) {
1158+ static  ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors_from_buft_impl (
1159+         struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft , size_t  *  nbytes_total , bool  no_alloc ) {
11361160    GGML_ASSERT (ggml_get_no_alloc (ctx ) ==  true);
11371161
11381162    size_t  alignment  =  ggml_backend_buft_get_alignment (buft );
11391163    size_t  max_size  =  ggml_backend_buft_get_max_size (buft );
11401164
11411165    ggml_backend_buffer_t  *  buffers  =  NULL ;
11421166    size_t  n_buffers  =  0 ;
1167+     * nbytes_total  =  0 ;
11431168
11441169    size_t  cur_buf_size  =  0 ;
11451170    struct  ggml_tensor  *  first  =  ggml_get_first_tensor (ctx );
@@ -1151,10 +1176,11 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11511176
11521177        if  (cur_buf_size  >  0  &&  (cur_buf_size  +  this_size ) >  max_size ) {
11531178            // allocate tensors in the current buffer 
1154-             if  (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1179+             if  (!no_alloc   &&  ! alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
11551180                return  NULL ;
11561181            }
11571182            first  =  t ;
1183+             * nbytes_total  +=  cur_buf_size ;
11581184            cur_buf_size  =  this_size ;
11591185        } else  {
11601186            cur_buf_size  +=  this_size ;
@@ -1163,15 +1189,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11631189
11641190    // allocate remaining tensors 
11651191    if  (cur_buf_size  >  0 ) {
1166-         if  (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1192+         * nbytes_total  +=  cur_buf_size ;
1193+         if  (!no_alloc  &&  !alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
11671194            return  NULL ;
11681195        }
11691196    }
11701197
1198+     if  (no_alloc ) {
1199+         return  NULL ;
1200+     }
1201+ 
11711202    if  (n_buffers  ==  0 ) {
11721203#ifndef  NDEBUG 
11731204        GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
11741205#endif 
1206+         GGML_ASSERT (!buffers );
11751207        return  NULL ;
11761208    }
11771209
@@ -1181,10 +1213,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11811213    } else  {
11821214        buffer  =  ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
11831215    }
1184-     free (buffers );
1216+     if  (buffers ) {
1217+         free (buffers ); // can be NULL if context is empty or no_alloc 
1218+     }
11851219    return  buffer ;
11861220}
11871221
1222+ size_t  ggml_backend_alloc_ctx_tensors_from_buft_size (struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft ) {
1223+     size_t  nbytes_total  =  0 ;
1224+     ggml_backend_buffer_t  buf  =  ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc=*/  true);
1225+     GGML_ASSERT (!buf );
1226+     return  nbytes_total ;
1227+ }
1228+ 
1229+ ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors_from_buft (struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft ) {
1230+     size_t  nbytes_total  =  0 ;
1231+     return  ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc =*/  false);
1232+ }
1233+ 
11881234ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors (struct  ggml_context  *  ctx , ggml_backend_t  backend ) {
11891235    return  ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
11901236}
0 commit comments