@@ -2284,12 +2284,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
22842284 max_n_tensors += n_layer*2 ; // duplicated rope freq tensors
22852285 const size_t ctx_size = ggml_tensor_overhead ()*max_n_tensors;
22862286
2287- // define a comparator for the buft -> ctx map to ensure that the order is well-defined:
2288- struct ggml_backend_buft_comparator {
2289- bool operator ()(const ggml_backend_buffer_type_t & lhs, const ggml_backend_buffer_type_t & rhs) const {
2290- return strcmp (ggml_backend_buft_name (lhs), ggml_backend_buft_name (rhs)) < 0 ;
2291- }
2292- };
22932287 std::map<ggml_backend_buffer_type_t , ggml_context_ptr, ggml_backend_buft_comparator> ctx_map;
22942288
22952289 auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * {
@@ -6354,14 +6348,20 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
63546348}
63556349
63566350bool llama_model::create_split_backend_buffers (
6357- const uint16_t idx, std::map<std::pair<ggml_backend_buffer_type_t , uint16_t >, ggml_context * > & ctx_split_map,
6351+ const uint16_t idx, std::map<std::pair<ggml_backend_buffer_type_t , uint16_t >, ggml_context_ptr > & ctx_split_map,
63586352 llama_model_loader & ml, const bool use_mmap_buffer, const bool use_mlock, const int32_t n_gpu_layers) {
63596353 // Extract contexts for the given split index from ctx_split_map into a new map
6360- std::map<ggml_backend_buffer_type_t , ggml_context *> ctx_map;
6361- for (const auto & [buft_split_idx, ctx] : ctx_split_map) {
6354+ std::map<ggml_backend_buffer_type_t , ggml_context_ptr, ggml_backend_buft_comparator> ctx_map;
6355+ for (auto it = ctx_split_map.begin (); it != ctx_split_map.end ();) {
6356+ const auto & [buft_split_idx, ctx_ptr] = *it;
63626357 const auto & [buft, split_idx] = buft_split_idx;
63636358 if (split_idx == idx) {
6364- ctx_map[buft] = ctx;
6359+ // Move the context from ctx_split_map to ctx_map
6360+ ctx_map[buft] = std::move (it->second );
6361+ // Remove from ctx_split_map since ownership has been transferred
6362+ it = ctx_split_map.erase (it);
6363+ } else {
6364+ ++it;
63656365 }
63666366 }
63676367
@@ -6370,12 +6370,15 @@ bool llama_model::create_split_backend_buffers(
63706370 constexpr bool do_print_backend_buffers_info = false ;
63716371 const bool creation_success = create_backend_buffers (split_data_size, ctx_map, ml, use_mmap_buffer, use_mlock,
63726372 n_gpu_layers, do_print_backend_buffers_info);
6373+
6374+ // Note: create_backend_buffers moves the contexts into ctxs_bufs, taking ownership
6375+ // The contexts in ctx_map are now empty after the move, which is expected
63736376
63746377 return creation_success;
63756378}
63766379
63776380bool llama_model::create_backend_buffers (std::size_t size_data,
6378- const std::map<ggml_backend_buffer_type_t , ggml_context * > & ctx_map,
6381+ std::map<ggml_backend_buffer_type_t , ggml_context_ptr, ggml_backend_buft_comparator > & ctx_map,
63796382 llama_model_loader & ml, const bool use_mmap_buffer, const bool use_mlock,
63806383 const int32_t n_gpu_layers, bool do_print_backend_buffers_info) {
63816384 // create the backend buffers
0 commit comments