Skip to content

Commit 9d66473

Browse files
committed
Merge branch 'numa-improvements-take2-iteration' into numa-improvements-take2
2 parents e60723d + 756fba6 commit 9d66473

File tree

1 file changed

+39
-35
lines changed

1 file changed

+39
-35
lines changed

src/llama-model-loader.cpp

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -885,9 +885,13 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
885885
// Add empty entries for consistency
886886
mlock_mmaps->emplace_back(nullptr);
887887
}
888-
// Store the same unified mapping for each file index
889-
mappings.emplace_back(i == 0 ? std::move(unified_mapping) :
890-
std::unique_ptr<llama_mmap>(nullptr));
888+
// Store the unified mapping only in the first slot
889+
// Other slots remain nullptr - access code will check for unified mapping
890+
if (i == 0) {
891+
mappings.emplace_back(std::move(unified_mapping));
892+
} else {
893+
mappings.emplace_back(nullptr);
894+
}
891895
}
892896
} else {
893897
#endif
@@ -917,17 +921,8 @@ void llama_model_loader::get_mapping_range(size_t * first, size_t * last, void *
917921
GGML_ASSERT(!mappings.empty());
918922

919923
#ifdef GGML_NUMA_MIRROR
920-
// Check if this is a unified mapping (mapping[0] exists but others are null)
921-
bool is_unified_mapping = mappings.size() > 1 && mappings[0];
922-
// Verify it's truly unified by checking that all other mappings are null
923-
if (is_unified_mapping) {
924-
for (size_t i = 1; i < mappings.size(); ++i) {
925-
if (mappings[i]) {
926-
is_unified_mapping = false;
927-
break;
928-
}
929-
}
930-
}
924+
// Check if this is a unified mapping by seeing if mappings[1] is null but mappings[0] exists
925+
bool is_unified_mapping = mappings.size() > 1 && mappings[0] && !mappings[1];
931926

932927
if (is_unified_mapping) {
933928
// For unified mapping, use the first (and only real) mapping
@@ -986,12 +981,12 @@ void llama_model_loader::load_data_for(struct ggml_tensor * cur) const {
986981

987982
if (use_mmap) {
988983
#ifdef GGML_NUMA_MIRROR
989-
// Check if this is a unified mapping (mapping[0] exists but others are null)
990-
bool is_unified_mapping = mappings.size() > 1 && mappings[0];
991-
// Verify it's truly unified by checking that all other mappings are null
984+
// Check if this is a unified mapping by comparing if all mappings point to the same object
985+
bool is_unified_mapping = mappings.size() > 1;
992986
if (is_unified_mapping) {
987+
llama_mmap * first_ptr = mappings[0].get();
993988
for (size_t i = 1; i < mappings.size(); ++i) {
994-
if (mappings[i]) {
989+
if (mappings[i].get() != first_ptr) {
995990
is_unified_mapping = false;
996991
break;
997992
}
@@ -1152,12 +1147,34 @@ bool llama_model_loader::load_all_data(
11521147
size_t n_size = ggml_nbytes(cur);
11531148

11541149
if (use_mmap) {
1155-
const auto & mapping = mappings.at(weight->idx);
1150+
// Check if this is a unified mapping and get the appropriate mapping
1151+
std::unique_ptr<llama_mmap> * mapping_ptr;
1152+
size_t file_offset = 0;
1153+
1154+
#ifdef GGML_NUMA_MIRROR
1155+
// Check if this is a unified mapping by seeing if mappings[1] is null but mappings[0] exists
1156+
bool is_unified_mapping = mappings.size() > 1 && mappings[0] && !mappings[1];
1157+
if (is_unified_mapping) {
1158+
// For unified mapping, always use mappings[0] and calculate the file offset
1159+
mapping_ptr = &mappings[0];
1160+
// Calculate offset for this file within the unified mapping
1161+
for (int i = 0; i < weight->idx; ++i) {
1162+
file_offset += files[i]->size();
1163+
}
1164+
} else {
1165+
// Standard per-file mapping
1166+
mapping_ptr = &mappings.at(weight->idx);
1167+
}
1168+
#else
1169+
mapping_ptr = &mappings.at(weight->idx);
1170+
#endif
1171+
1172+
const auto & mapping = *mapping_ptr;
11561173
ggml_backend_buffer_t buf_mmap = nullptr;
11571174
if (bufs.count(weight->idx)) {
11581175
buf_mmap = bufs.at(weight->idx);
11591176
}
1160-
uint8_t * data = (uint8_t *) mapping->addr() + weight->offs;
1177+
uint8_t * data = (uint8_t *) mapping->addr() + file_offset + weight->offs;
11611178

11621179
if (check_tensors) {
11631180
validation_result.emplace_back(std::async(std::launch::async, [cur, data, n_size] {
@@ -1250,16 +1267,8 @@ bool llama_model_loader::load_all_data(
12501267
if (size_done >= size_data) {
12511268
// unmap offloaded tensors and metadata
12521269
if (use_mmap) {
1253-
// Check if this is a unified mapping (mapping[0] exists but others are null)
1254-
bool is_unified_mapping = mappings.size() > 1 && mappings[0];
1255-
if (is_unified_mapping) {
1256-
for (size_t i = 1; i < mappings.size(); ++i) {
1257-
if (mappings[i]) {
1258-
is_unified_mapping = false;
1259-
break;
1260-
}
1261-
}
1262-
}
1270+
// Check if this is a unified mapping by seeing if mappings[1] is null but mappings[0] exists
1271+
bool is_unified_mapping = mappings.size() > 1 && mappings[0] && !mappings[1];
12631272

12641273
if (is_unified_mapping) {
12651274
// For unified mappings, skip unmap_fragment calls entirely
@@ -1271,11 +1280,6 @@ bool llama_model_loader::load_all_data(
12711280
const auto & mmap_used = mmaps_used.at(idx);
12721281
auto & mapping = mappings.at(idx);
12731282

1274-
// Skip null mappings
1275-
if (!mapping) {
1276-
continue;
1277-
}
1278-
12791283
mapping->unmap_fragment(0, mmap_used.first);
12801284
if (mmap_used.second != 0) {
12811285
mapping->unmap_fragment(mmap_used.second, mapping->size());

0 commit comments

Comments
 (0)