@@ -595,7 +595,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
595595}
596596
597597static  bool  ggml_gallocr_is_allocated (ggml_gallocr_t  galloc , struct  ggml_tensor  *  t ) {
598-     return  t -> data  !=  NULL  ||  ggml_gallocr_hash_get (galloc , t )-> allocated ;
598+     return  t -> data  !=  NULL  // tensor data already set externally 
599+         ||  t -> buffer  // tensor on external buffer (but not yet allocated) 
600+         ||  ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc 
599601}
600602
601603static  void  ggml_gallocr_allocate_node (ggml_gallocr_t  galloc , struct  ggml_tensor  *  node , int  buffer_id ) {
@@ -791,7 +793,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
791793    }
792794}
793795
794- bool  ggml_gallocr_reserve_n (ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids ) {
796+ static  bool  ggml_gallocr_reserve_n_impl (
797+         ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids , bool  no_alloc ) {
795798    size_t  min_hash_size  =  graph -> n_nodes  +  graph -> n_leafs ;
796799    // add 25% margin to avoid hash collisions 
797800    min_hash_size  +=  min_hash_size  / 4 ;
@@ -893,21 +896,41 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
893896        if  (realloc ) {
894897#ifndef  NDEBUG 
895898            size_t  cur_size  =  galloc -> buffers [i ] ? ggml_vbuffer_size (galloc -> buffers [i ]) : 0 ;
896-             GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size  / 1024.0  / 1024.0 , new_size  / 1024.0  / 1024.0 );
899+             GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" ,
900+                 __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size  / 1024.0  / 1024.0 , new_size  / 1024.0  / 1024.0 );
897901#endif 
898902
899903            ggml_vbuffer_free (galloc -> buffers [i ]);
900-             galloc -> buffers [i ] =  ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
901-             if  (galloc -> buffers [i ] ==  NULL ) {
902-                 GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
903-                 return  false;
904+             if  (no_alloc ) {
905+                 galloc -> buffers [i ] =  NULL ;
906+             } else  {
907+                 galloc -> buffers [i ] =  ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
908+                 if  (galloc -> buffers [i ] ==  NULL ) {
909+                     GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
910+                     return  false;
911+                 }
904912            }
905913        }
906914    }
907915
908916    return  true;
909917}
910918
919+ void  ggml_gallocr_reserve_n_size (
920+         ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids , size_t  *  sizes ) {
921+     GGML_ASSERT (ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/  true));
922+     for  (int  i  =  0 ; i  <  galloc -> n_buffers ; i ++ ) {
923+         sizes [i ] =  0 ;
924+         for  (int  c  =  0 ; c  <  galloc -> buf_tallocs [i ]-> n_chunks ; c ++ ) {
925+             sizes [i ] +=  galloc -> buf_tallocs [i ]-> chunks [c ]-> max_size ;
926+         }
927+     }
928+ }
929+ 
930+ bool  ggml_gallocr_reserve_n (ggml_gallocr_t  galloc , struct  ggml_cgraph  *  graph , const  int  *  node_buffer_ids , const  int  *  leaf_buffer_ids ) {
931+     return  ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/  false);
932+ }
933+ 
911934bool  ggml_gallocr_reserve (ggml_gallocr_t  galloc , struct  ggml_cgraph  * graph ) {
912935    return  ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
913936}
@@ -1110,14 +1133,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
11101133    return  true;
11111134}
11121135
1113- ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors_from_buft (struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft ) {
1136+ static  ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors_from_buft_impl (
1137+         struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft , size_t  *  nbytes_total , bool  no_alloc ) {
11141138    GGML_ASSERT (ggml_get_no_alloc (ctx ) ==  true);
11151139
11161140    size_t  alignment  =  ggml_backend_buft_get_alignment (buft );
11171141    size_t  max_size  =  ggml_backend_buft_get_max_size (buft );
11181142
11191143    ggml_backend_buffer_t  *  buffers  =  NULL ;
11201144    size_t  n_buffers  =  0 ;
1145+     * nbytes_total  =  0 ;
11211146
11221147    size_t  cur_buf_size  =  0 ;
11231148    struct  ggml_tensor  *  first  =  ggml_get_first_tensor (ctx );
@@ -1129,10 +1154,11 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11291154
11301155        if  (cur_buf_size  >  0  &&  (cur_buf_size  +  this_size ) >  max_size ) {
11311156            // allocate tensors in the current buffer 
1132-             if  (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1157+             if  (!no_alloc   &&  ! alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
11331158                return  NULL ;
11341159            }
11351160            first  =  t ;
1161+             * nbytes_total  +=  cur_buf_size ;
11361162            cur_buf_size  =  this_size ;
11371163        } else  {
11381164            cur_buf_size  +=  this_size ;
@@ -1141,15 +1167,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11411167
11421168    // allocate remaining tensors 
11431169    if  (cur_buf_size  >  0 ) {
1144-         if  (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1170+         * nbytes_total  +=  cur_buf_size ;
1171+         if  (!no_alloc  &&  !alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
11451172            return  NULL ;
11461173        }
11471174    }
11481175
1176+     if  (no_alloc ) {
1177+         return  NULL ;
1178+     }
1179+ 
11491180    if  (n_buffers  ==  0 ) {
11501181#ifndef  NDEBUG 
11511182        GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
11521183#endif 
1184+         GGML_ASSERT (!buffers );
11531185        return  NULL ;
11541186    }
11551187
@@ -1159,10 +1191,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11591191    } else  {
11601192        buffer  =  ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
11611193    }
1162-     free (buffers );
1194+     if  (buffers ) {
1195+         free (buffers ); // can be NULL if context is empty or no_alloc 
1196+     }
11631197    return  buffer ;
11641198}
11651199
1200+ size_t  ggml_backend_alloc_ctx_tensors_from_buft_size (struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft ) {
1201+     size_t  nbytes_total  =  0 ;
1202+     ggml_backend_buffer_t  buf  =  ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc=*/  true);
1203+     GGML_ASSERT (!buf );
1204+     return  nbytes_total ;
1205+ }
1206+ 
1207+ ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors_from_buft (struct  ggml_context  *  ctx , ggml_backend_buffer_type_t  buft ) {
1208+     size_t  nbytes_total  =  0 ;
1209+     return  ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc =*/  false);
1210+ }
1211+ 
11661212ggml_backend_buffer_t  ggml_backend_alloc_ctx_tensors (struct  ggml_context  *  ctx , ggml_backend_t  backend ) {
11671213    return  ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
11681214}
0 commit comments