@@ -150,6 +150,7 @@ static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offs
150150}
151151#endif
152152
153+ // returns the offset for the allocation
153154static size_t ggml_dyn_tallocr_alloc (struct ggml_dyn_tallocr * alloc , size_t size , const struct ggml_tensor * tensor ) {
154155 size = aligned_offset (NULL , size , alloc -> alignment );
155156
@@ -472,7 +473,9 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
472473}
473474
474475static bool ggml_gallocr_is_allocated (ggml_gallocr_t galloc , struct ggml_tensor * t ) {
475- return t -> data != NULL || ggml_gallocr_hash_get (galloc , t )-> allocated ;
476+ return t -> data != NULL // tensor data already set externally
477+ || t -> buffer // tensor on external buffer (but may not yet be allocated)
478+ || ggml_gallocr_is_own (galloc , t ); // tensor will be allocated by galloc
476479}
477480
478481static void ggml_gallocr_allocate_node (ggml_gallocr_t galloc , struct ggml_tensor * node , int buffer_id ) {
@@ -670,7 +673,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
670673 }
671674}
672675
673- bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph , const int * node_buffer_ids , const int * leaf_buffer_ids ) {
676+ bool ggml_gallocr_reserve_n (ggml_gallocr_t galloc , struct ggml_cgraph * graph ,
677+ const int * node_buffer_ids , const int * leaf_buffer_ids , bool dry_run ) {
674678 size_t min_hash_size = graph -> n_nodes + graph -> n_leafs ;
675679 // add 25% margin to avoid hash collisions
676680 min_hash_size += min_hash_size / 4 ;
@@ -768,7 +772,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
768772#endif
769773
770774 ggml_backend_buffer_free (galloc -> buffers [i ]);
771- galloc -> buffers [i ] = ggml_backend_buft_alloc_buffer (galloc -> bufts [i ], new_size );
775+ galloc -> buffers [i ] = ggml_backend_buft_alloc_buffer (galloc -> bufts [i ], dry_run ? 0 : new_size );
772776 if (galloc -> buffers [i ] == NULL ) {
773777 GGML_LOG_ERROR ("%s: failed to allocate %s buffer of size %zu\n" , __func__ , ggml_backend_buft_name (galloc -> bufts [i ]), new_size );
774778 return false;
@@ -781,7 +785,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
781785}
782786
783787bool ggml_gallocr_reserve (ggml_gallocr_t galloc , struct ggml_cgraph * graph ) {
784- return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL );
788+ return ggml_gallocr_reserve_n (galloc , graph , NULL , NULL , /*dry_run =*/ false );
785789}
786790
787791static void ggml_gallocr_init_tensor (ggml_gallocr_t galloc , struct ggml_tensor * tensor , struct tensor_alloc * tensor_alloc ) {
@@ -934,6 +938,15 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
934938 return ggml_backend_buffer_get_size (galloc -> buffers [buffer_id ]);
935939}
936940
941+ size_t ggml_gallocr_get_max_size (ggml_gallocr_t galloc , ggml_backend_dev_t dev ) {
942+ for (int i = 0 ; i < galloc -> n_buffers ; i ++ ) {
943+ if (ggml_backend_buft_get_device (galloc -> bufts [i ]) == dev ) {
944+ return ggml_dyn_tallocr_max_size (galloc -> buf_tallocs [i ]);
945+ }
946+ }
947+ return 0 ;
948+ }
949+
937950// utils
938951
939952static void free_buffers (ggml_backend_buffer_t * * buffers , const size_t * n_buffers ) {
@@ -984,14 +997,16 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
984997 return true;
985998}
986999
987- ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1000+ static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl (
1001+ struct ggml_context * ctx , ggml_backend_buffer_type_t buft , size_t * nbytes_total , bool dry_run ) {
9881002 GGML_ASSERT (ggml_get_no_alloc (ctx ) == true);
9891003
9901004 size_t alignment = ggml_backend_buft_get_alignment (buft );
9911005 size_t max_size = ggml_backend_buft_get_max_size (buft );
9921006
9931007 ggml_backend_buffer_t * buffers = NULL ;
9941008 size_t n_buffers = 0 ;
1009+ * nbytes_total = 0 ;
9951010
9961011 size_t cur_buf_size = 0 ;
9971012 struct ggml_tensor * first = ggml_get_first_tensor (ctx );
@@ -1003,10 +1018,13 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
10031018
10041019 if (cur_buf_size > 0 && (cur_buf_size + this_size ) > max_size ) {
10051020 // allocate tensors in the current buffer
1006- if (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1007- return NULL ;
1021+ if (!dry_run ) {
1022+ if (!alloc_tensor_range (ctx , first , t , buft , cur_buf_size , & buffers , & n_buffers )) {
1023+ return NULL ;
1024+ }
10081025 }
10091026 first = t ;
1027+ * nbytes_total += cur_buf_size ;
10101028 cur_buf_size = this_size ;
10111029 } else {
10121030 cur_buf_size += this_size ;
@@ -1015,15 +1033,23 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
10151033
10161034 // allocate remaining tensors
10171035 if (cur_buf_size > 0 ) {
1018- if (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1019- return NULL ;
1036+ * nbytes_total += cur_buf_size ;
1037+ if (!dry_run ) {
1038+ if (!alloc_tensor_range (ctx , first , NULL , buft , cur_buf_size , & buffers , & n_buffers )) {
1039+ return NULL ;
1040+ }
10201041 }
10211042 }
10221043
1044+ if (dry_run ) {
1045+ return NULL ;
1046+ }
1047+
10231048 if (n_buffers == 0 ) {
10241049#ifndef NDEBUG
10251050 GGML_LOG_DEBUG ("%s: all tensors in the context are already allocated\n" , __func__ );
10261051#endif
1052+ GGML_ASSERT (!buffers );
10271053 return NULL ;
10281054 }
10291055
@@ -1033,10 +1059,24 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
10331059 } else {
10341060 buffer = ggml_backend_multi_buffer_alloc_buffer (buffers , n_buffers );
10351061 }
1036- free (buffers );
1062+ if (buffers ) {
1063+ free (buffers ); // can be NULL if dry_run or context is empty
1064+ }
10371065 return buffer ;
10381066}
10391067
1068+ size_t ggml_backend_alloc_ctx_tensors_from_buft_size (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1069+ size_t nbytes_total = 0 ;
1070+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*dry_run =*/ true);
1071+ GGML_ASSERT (!buf );
1072+ return nbytes_total ;
1073+ }
1074+
1075+ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft (struct ggml_context * ctx , ggml_backend_buffer_type_t buft ) {
1076+ size_t nbytes_total = 0 ;
1077+ return ggml_backend_alloc_ctx_tensors_from_buft_impl (ctx , buft , & nbytes_total , /*dry_run =*/ false);
1078+ }
1079+
10401080ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors (struct ggml_context * ctx , ggml_backend_t backend ) {
10411081 return ggml_backend_alloc_ctx_tensors_from_buft (ctx , ggml_backend_get_default_buffer_type (backend ));
10421082}
0 commit comments