@@ -885,9 +885,13 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
885885 // Add empty entries for consistency
886886 mlock_mmaps->emplace_back (nullptr );
887887 }
888- // Store the same unified mapping for each file index
889- mappings.emplace_back (i == 0 ? std::move (unified_mapping) :
890- std::unique_ptr<llama_mmap>(nullptr ));
888+ // Store the unified mapping only in the first slot
889+ // Other slots remain nullptr - access code will check for unified mapping
890+ if (i == 0 ) {
891+ mappings.emplace_back (std::move (unified_mapping));
892+ } else {
893+ mappings.emplace_back (nullptr );
894+ }
891895 }
892896 } else {
893897#endif
@@ -917,17 +921,8 @@ void llama_model_loader::get_mapping_range(size_t * first, size_t * last, void *
917921 GGML_ASSERT (!mappings.empty ());
918922
919923#ifdef GGML_NUMA_MIRROR
920- // Check if this is a unified mapping (mapping[0] exists but others are null)
921- bool is_unified_mapping = mappings.size () > 1 && mappings[0 ];
922- // Verify it's truly unified by checking that all other mappings are null
923- if (is_unified_mapping) {
924- for (size_t i = 1 ; i < mappings.size (); ++i) {
925- if (mappings[i]) {
926- is_unified_mapping = false ;
927- break ;
928- }
929- }
930- }
924+ // Check if this is a unified mapping by seeing if mappings[1] is null but mappings[0] exists
925+ bool is_unified_mapping = mappings.size () > 1 && mappings[0 ] && !mappings[1 ];
931926
932927 if (is_unified_mapping) {
933928 // For unified mapping, use the first (and only real) mapping
@@ -986,12 +981,12 @@ void llama_model_loader::load_data_for(struct ggml_tensor * cur) const {
986981
987982 if (use_mmap) {
988983#ifdef GGML_NUMA_MIRROR
989- // Check if this is a unified mapping (mapping[0] exists but others are null)
990- bool is_unified_mapping = mappings.size () > 1 && mappings[0 ];
991- // Verify it's truly unified by checking that all other mappings are null
984+ // Check if this is a unified mapping by comparing if all mappings point to the same object
985+ bool is_unified_mapping = mappings.size () > 1 ;
992986 if (is_unified_mapping) {
987+ llama_mmap * first_ptr = mappings[0 ].get ();
993988 for (size_t i = 1 ; i < mappings.size (); ++i) {
994- if (mappings[i]) {
989+ if (mappings[i]. get () != first_ptr ) {
995990 is_unified_mapping = false ;
996991 break ;
997992 }
@@ -1152,12 +1147,34 @@ bool llama_model_loader::load_all_data(
11521147 size_t n_size = ggml_nbytes (cur);
11531148
11541149 if (use_mmap) {
1155- const auto & mapping = mappings.at (weight->idx );
1150+ // Check if this is a unified mapping and get the appropriate mapping
1151+ std::unique_ptr<llama_mmap> * mapping_ptr;
1152+ size_t file_offset = 0 ;
1153+
1154+ #ifdef GGML_NUMA_MIRROR
1155+ // Check if this is a unified mapping by seeing if mappings[1] is null but mappings[0] exists
1156+ bool is_unified_mapping = mappings.size () > 1 && mappings[0 ] && !mappings[1 ];
1157+ if (is_unified_mapping) {
1158+ // For unified mapping, always use mappings[0] and calculate the file offset
1159+ mapping_ptr = &mappings[0 ];
1160+ // Calculate offset for this file within the unified mapping
1161+ for (int i = 0 ; i < weight->idx ; ++i) {
1162+ file_offset += files[i]->size ();
1163+ }
1164+ } else {
1165+ // Standard per-file mapping
1166+ mapping_ptr = &mappings.at (weight->idx );
1167+ }
1168+ #else
1169+ mapping_ptr = &mappings.at (weight->idx );
1170+ #endif
1171+
1172+ const auto & mapping = *mapping_ptr;
11561173 ggml_backend_buffer_t buf_mmap = nullptr ;
11571174 if (bufs.count (weight->idx )) {
11581175 buf_mmap = bufs.at (weight->idx );
11591176 }
1160- uint8_t * data = (uint8_t *) mapping->addr () + weight->offs ;
1177+ uint8_t * data = (uint8_t *) mapping->addr () + file_offset + weight->offs ;
11611178
11621179 if (check_tensors) {
11631180 validation_result.emplace_back (std::async (std::launch::async, [cur, data, n_size] {
@@ -1250,16 +1267,8 @@ bool llama_model_loader::load_all_data(
12501267 if (size_done >= size_data) {
12511268 // unmap offloaded tensors and metadata
12521269 if (use_mmap) {
1253- // Check if this is a unified mapping (mapping[0] exists but others are null)
1254- bool is_unified_mapping = mappings.size () > 1 && mappings[0 ];
1255- if (is_unified_mapping) {
1256- for (size_t i = 1 ; i < mappings.size (); ++i) {
1257- if (mappings[i]) {
1258- is_unified_mapping = false ;
1259- break ;
1260- }
1261- }
1262- }
1270+ // Check if this is a unified mapping by seeing if mappings[1] is null but mappings[0] exists
1271+ bool is_unified_mapping = mappings.size () > 1 && mappings[0 ] && !mappings[1 ];
12631272
12641273 if (is_unified_mapping) {
12651274 // For unified mappings, skip unmap_fragment calls entirely
@@ -1271,11 +1280,6 @@ bool llama_model_loader::load_all_data(
12711280 const auto & mmap_used = mmaps_used.at (idx);
12721281 auto & mapping = mappings.at (idx);
12731282
1274- // Skip null mappings
1275- if (!mapping) {
1276- continue ;
1277- }
1278-
12791283 mapping->unmap_fragment (0 , mmap_used.first );
12801284 if (mmap_used.second != 0 ) {
12811285 mapping->unmap_fragment (mmap_used.second , mapping->size ());
0 commit comments