@@ -602,7 +602,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
602602}
603603
604604static bool ggml_gallocr_is_allocated (ggml_gallocr_t galloc , struct ggml_tensor * t ) {
605- return t -> data != NULL || ggml_gallocr_hash_get (galloc , t )-> allocated ;
605+ return t -> data != NULL // tensor data already set externally
606+ || t -> buffer // tensor on external buffer (but not yet allocated)
607+ || ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc
606608}
607609
608610// free the extra space at the end if the new tensor is smaller
@@ -820,7 +822,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
820822 }
821823}
822824
823- bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
825+ static bool ggml_gallocr_reserve_n_impl (
826+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , bool no_alloc ) {
824827 size_t min_hash_size = graph -> n_nodes + graph -> n_leafs ;
825828 // add 25% margin to avoid hash collisions
826829 min_hash_size += min_hash_size / 4 ;
@@ -922,21 +925,41 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
922925 if (realloc ) {
923926#ifndef NDEBUG
924927 size_t cur_size = galloc -> buffers [i ] ? ggml_vbuffer_size (galloc -> buffers [i ]) : 0 ;
925- GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
928+ GGML_LOG_DEBUG ("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n" ,
929+ __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), cur_size / 1024.0 / 1024.0 , new_size / 1024.0 / 1024.0 );
926930#endif
927931
928932 ggml_vbuffer_free (galloc -> buffers [i ]);
929- galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
930- if (galloc -> buffers [i ] == NULL ) {
931- GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
932- return false;
933+ if (no_alloc ) {
934+ galloc -> buffers [i ] = NULL ;
935+ } else {
936+ galloc -> buffers [i ] = ggml_vbuffer_alloc (galloc -> bufts [i ], galloc -> buf_tallocs [i ], GGML_BACKEND_BUFFER_USAGE_COMPUTE );
937+ if (galloc -> buffers [i ] == NULL ) {
938+ GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
939+ return false;
940+ }
933941 }
934942 }
935943 }
936944
937945 return true;
938946}
939947
948+ void ggml_gallocr_reserve_n_size (
949+ ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids , size_t * sizes ) {
950+ GGML_ASSERT (ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ true));
951+ for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
952+ sizes [i ] = 0 ;
953+ for (int c = 0 ; c < galloc -> buf_tallocs [i ]-> n_chunks ; c ++ ) {
954+ sizes [i ] += galloc -> buf_tallocs [i ]-> chunks [c ]-> max_size ;
955+ }
956+ }
957+ }
958+
959+ bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
960+ return ggml_gallocr_reserve_n_impl (galloc , graph , node_buffer_ids , leaf_buffer_ids , /*no_alloc =*/ false);
961+ }
962+
940963bool ggml_gallocr_reserve (ggml_gallocr_t galloc , struct ggml_cgraph * graph ) {
941964 return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
942965}
@@ -1139,14 +1162,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
11391162 return true;
11401163}
11411164
1142- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1165+ static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl (
1166+ struct ggml_context * ctx , ggml_backend_buffer_type_t buft , size_t * nbytes_total , bool no_alloc ) {
11431167 GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
11441168
11451169 size_t alignment = ggml_backend_buft_get_alignment (buft );
11461170 size_t max_size = ggml_backend_buft_get_max_size (buft );
11471171
11481172 ggml_backend_buffer_t * buffers = NULL ;
11491173 size_t n_buffers = 0 ;
1174+ * nbytes_total = 0 ;
11501175
11511176 size_t cur_buf_size = 0 ;
11521177 struct ggml_tensor * first = ggml_get_first_tensor (ctx );
@@ -1158,10 +1183,11 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11581183
11591184 if (cur_buf_size > 0 && (cur_buf_size + this_size ) > max_size ) {
11601185 // allocate tensors in the current buffer
1161- if (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1186+ if (!no_alloc && ! alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
11621187 return NULL ;
11631188 }
11641189 first = t ;
1190+ * nbytes_total += cur_buf_size ;
11651191 cur_buf_size = this_size ;
11661192 } else {
11671193 cur_buf_size += this_size ;
@@ -1170,15 +1196,21 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11701196
11711197 // allocate remaining tensors
11721198 if (cur_buf_size > 0 ) {
1173- if (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1199+ * nbytes_total += cur_buf_size ;
1200+ if (!no_alloc && !alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
11741201 return NULL ;
11751202 }
11761203 }
11771204
1205+ if (no_alloc ) {
1206+ return NULL ;
1207+ }
1208+
11781209 if (n_buffers == 0 ) {
11791210#ifndef NDEBUG
11801211 GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
11811212#endif
1213+ GGML_ASSERT (!buffers );
11821214 return NULL ;
11831215 }
11841216
@@ -1188,10 +1220,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
11881220 } else {
11891221 buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
11901222 }
1191- free (buffers );
1223+ if (buffers ) {
1224+ free (buffers ); // can be NULL if context is empty or no_alloc
1225+ }
11921226 return buffer ;
11931227}
11941228
1229+ size_t ggml_backend_alloc_ctx_tensors_from_buft_size (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1230+ size_t nbytes_total = 0 ;
1231+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc=*/ true);
1232+ GGML_ASSERT (!buf );
1233+ return nbytes_total ;
1234+ }
1235+
1236+ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1237+ size_t nbytes_total = 0 ;
1238+ return ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*no_alloc =*/ false);
1239+ }
1240+
11951241ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
11961242 return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
11971243}
0 commit comments