@@ -1250,22 +1250,32 @@ bool llama_model_loader::load_all_data(
12501250 if (size_done >= size_data) {
12511251 // unmap offloaded tensors and metadata
12521252 if (use_mmap) {
1253- for (uint32_t idx = 0 ; idx < mappings.size (); idx++) {
1254- const auto & mmap_used = mmaps_used.at (idx);
1255- auto & mapping = mappings.at (idx);
1256-
1257- // Skip null mappings (can happen with unified NUMA mappings)
1258- if (!mapping) {
1259- continue ;
1253+ // Check if this is a unified mapping (mapping[0] exists but others are null)
1254+ bool is_unified_mapping = mappings.size () > 1 && mappings[0 ];
1255+ if (is_unified_mapping) {
1256+ for (size_t i = 1 ; i < mappings.size (); ++i) {
1257+ if (mappings[i]) {
1258+ is_unified_mapping = false ;
1259+ break ;
1260+ }
12601261 }
1261-
1262- // Check if this mapping uses NUMA mirroring
1263- // If so, skip the unmap_fragment calls as cleanup is handled in the destructor
1264- bool is_numa_mirrored = false ;
1265- #ifdef GGML_NUMA_MIRROR
1266- is_numa_mirrored = true ;
1267- #endif
1268- if (!is_numa_mirrored) {
1262+ }
1263+
1264+ if (is_unified_mapping) {
1265+ // For unified mappings, skip unmap_fragment calls entirely
1266+ // Cleanup will be handled by the unified mapping destructor
1267+ LLAMA_LOG_DEBUG (" Skipping unmap_fragment calls for unified mapping\n " );
1268+ } else {
1269+ // Original per-file mapping cleanup
1270+ for (uint32_t idx = 0 ; idx < mappings.size (); idx++) {
1271+ const auto & mmap_used = mmaps_used.at (idx);
1272+ auto & mapping = mappings.at (idx);
1273+
1274+ // Skip null mappings
1275+ if (!mapping) {
1276+ continue ;
1277+ }
1278+
12691279 mapping->unmap_fragment (0 , mmap_used.first );
12701280 if (mmap_used.second != 0 ) {
12711281 mapping->unmap_fragment (mmap_used.second , mapping->size ());
0 commit comments