Skip to content

Commit 4da24f7

Browse files
committed
all tensors we load in llama-model-loader.cpp are model weights.
Make logging prettier
1 parent b8bf5fa commit 4da24f7

File tree

3 files changed

+46
-64
lines changed

3 files changed

+46
-64
lines changed

.github/instructions/numa-mirroring-implementation.md

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -308,15 +308,15 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
308308
// Bind current thread to the target NUMA node for first-touch
309309
struct bitmask* old_mask = numa_get_run_node_mask();
310310
if (numa_run_on_node(node) != 0) {
311-
LLAMA_LOG_DEBUG("numa_mirroring Warning: could not bind thread to NUMA node %d: %s\n", node, strerror(errno));
311+
LLAMA_LOG_DEBUG("numa_mirroring: Warning: could not bind thread to NUMA node %d: %s\n", node, strerror(errno));
312312
// Continue anyway - might still work
313313
}
314314

315315
// Use posix_memalign for SIMD alignment
316316
void* ptr = nullptr;
317317
int ret = posix_memalign(&ptr, alignment, size);
318318
if (ret != 0) {
319-
LLAMA_LOG_DEBUG("numa_mirroring posix_memalign failed for %zu bytes with alignment %zu: %s\n",
319+
LLAMA_LOG_DEBUG("numa_mirroring: posix_memalign failed for %zu bytes with alignment %zu: %s\n",
320320
size, alignment, strerror(ret));
321321
// Restore original thread binding
322322
if (old_mask) {
@@ -339,23 +339,23 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
339339
numa_free_nodemask(old_mask);
340340
}
341341

342-
LLAMA_LOG_DEBUG("numa_mirroring First-touch allocation: %zu bytes for node %d at %p (SIMD aligned to %zu bytes)\n",
342+
LLAMA_LOG_DEBUG("numa_mirroring: First-touch allocation: %zu bytes for node %d at %p (SIMD aligned to %zu bytes)\n",
343343
size, node, ptr, alignment);
344344
return ptr;
345345
}
346346

347347
void mmap_numa_mirror(struct llama_file * file) {
348348
int num_nodes = numa_num_configured_nodes();
349349
if (num_nodes <= 1) {
350-
throw std::runtime_error("numa_mirroring NUMA mirror mode requires multiple NUMA nodes");
350+
throw std::runtime_error("numa_mirroring: NUMA mirror mode requires multiple NUMA nodes");
351351
}
352352
353-
LLAMA_LOG_INFO("numa_mirroring NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
353+
LLAMA_LOG_INFO("numa_mirroring: NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
354354
file->size() / (1024.0 * 1024.0), num_nodes);
355355

356356
size_t total_size = file->size();
357357
for (int node = 0; node < num_nodes; ++node) {
358-
LLAMA_LOG_INFO("numa_mirroring Allocating on node %d \n", node);
358+
LLAMA_LOG_INFO("numa_mirroring: Allocating on node %d \n", node);
359359

360360
void* node_mem = numa_alloc_first_touch(total_size, node);
361361
if (!node_mem) {
@@ -368,24 +368,24 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
368368
// VERIFICATION: Check that memory was actually allocated on the expected NUMA node
369369
int actual_node = -1;
370370
if (get_mempolicy(&actual_node, NULL, 0, node_mem, MPOL_F_NODE | MPOL_F_ADDR) == 0) {
371-
LLAMA_LOG_DEBUG("numa_mirroring Memory at %p allocated on node %d (expected %d)\n",
371+
LLAMA_LOG_DEBUG("numa_mirroring: Memory at %p allocated on node %d (expected %d)\n",
372372
node_mem, actual_node, node);
373373
if (actual_node != node) {
374-
LLAMA_LOG_WARN("numa_mirroring WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
374+
LLAMA_LOG_WARN("numa_mirroring: WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
375375
node, actual_node);
376376
} else {
377-
LLAMA_LOG_DEBUG("numa_mirroring First-touch succeeded - memory correctly placed on node %d\n", node);
377+
LLAMA_LOG_DEBUG("numa_mirroring: First-touch succeeded - memory correctly placed on node %d\n", node);
378378
}
379379
} else {
380-
LLAMA_LOG_WARN("numa_mirroring Could not verify allocation node for %p: %s\n",
380+
LLAMA_LOG_WARN("numa_mirroring: Could not verify allocation node for %p: %s\n",
381381
node_mem, strerror(errno));
382382
}
383383

384384
file->seek(0, SEEK_SET);
385385
file->read_raw(node_mem, total_size);
386386
numa_mappings.push_back({node_mem, total_size});
387387

388-
LLAMA_LOG_DEBUG("numa_mirroring Successfully allocated and populated %.2f MB on node %d at %p\n",
388+
LLAMA_LOG_DEBUG("numa_mirroring: Successfully allocated and populated %.2f MB on node %d at %p\n",
389389
total_size / (1024.0 * 1024.0), node, node_mem);
390390
}
391391
addr = numa_mappings.empty() ? nullptr : numa_mappings[0].addr;
@@ -394,15 +394,15 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
394394
void mmap_numa_mirror(struct llama_file * file) {
395395
int num_nodes = numa_num_configured_nodes();
396396
if (num_nodes <= 1) {
397-
throw std::runtime_error("numa_mirroring NUMA mirror mode requires multiple NUMA nodes");
397+
throw std::runtime_error("numa_mirroring: NUMA mirror mode requires multiple NUMA nodes");
398398
}
399399
400-
LLAMA_LOG_INFO("numa_mirroring NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
400+
LLAMA_LOG_INFO("numa_mirroring: NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
401401
file->size() / (1024.0 * 1024.0), num_nodes);
402402

403403
size_t total_size = file->size();
404404
for (int node = 0; node < num_nodes; ++node) {
405-
LLAMA_LOG_INFO("numa_mirroring Allocating on node %d \n", node);
405+
LLAMA_LOG_INFO("numa_mirroring: Allocating on node %d \n", node);
406406

407407
void* node_mem = numa_alloc_first_touch(total_size, node);
408408
if (!node_mem) {
@@ -415,24 +415,24 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
415415
// VERIFICATION: Check that memory was actually allocated on the expected NUMA node
416416
int actual_node = -1;
417417
if (get_mempolicy(&actual_node, NULL, 0, node_mem, MPOL_F_NODE | MPOL_F_ADDR) == 0) {
418-
LLAMA_LOG_DEBUG("numa_mirroring Memory at %p allocated on node %d (expected %d)\n",
418+
LLAMA_LOG_DEBUG("numa_mirroring: Memory at %p allocated on node %d (expected %d)\n",
419419
node_mem, actual_node, node);
420420
if (actual_node != node) {
421-
LLAMA_LOG_WARN("numa_mirroring WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
421+
LLAMA_LOG_WARN("numa_mirroring: WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
422422
node, actual_node);
423423
} else {
424-
LLAMA_LOG_DEBUG("numa_mirroring First-touch succeeded - memory correctly placed on node %d\n", node);
424+
LLAMA_LOG_DEBUG("numa_mirroring: First-touch succeeded - memory correctly placed on node %d\n", node);
425425
}
426426
} else {
427-
LLAMA_LOG_WARN("numa_mirroring Could not verify allocation node for %p: %s\n",
427+
LLAMA_LOG_WARN("numa_mirroring: Could not verify allocation node for %p: %s\n",
428428
node_mem, strerror(errno));
429429
}
430430

431431
file->seek(0, SEEK_SET);
432432
file->read_raw(node_mem, total_size);
433433
numa_mappings.push_back({node_mem, total_size});
434434

435-
LLAMA_LOG_DEBUG("numa_mirroring Successfully allocated and populated %.2f MB on node %d at %p\n",
435+
LLAMA_LOG_DEBUG("numa_mirroring: Successfully allocated and populated %.2f MB on node %d at %p\n",
436436
total_size / (1024.0 * 1024.0), node, node_mem);
437437
}
438438
addr = numa_mappings.empty() ? nullptr : numa_mappings[0].addr;

src/llama-mmap.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -313,15 +313,15 @@ struct llama_mmap::impl {
313313
// Bind current thread to the target NUMA node for first-touch
314314
struct bitmask* old_mask = numa_get_run_node_mask();
315315
if (numa_run_on_node(node) != 0) {
316-
LLAMA_LOG_DEBUG("numa_mirroring Warning: could not bind thread to NUMA node %d: %s\n", node, strerror(errno));
316+
LLAMA_LOG_DEBUG("numa_mirroring: Warning: could not bind thread to NUMA node %d: %s\n", node, strerror(errno));
317317
// Continue anyway - might still work
318318
}
319319

320320
// Use posix_memalign for SIMD alignment
321321
void* ptr = nullptr;
322322
int ret = posix_memalign(&ptr, alignment, size);
323323
if (ret != 0) {
324-
LLAMA_LOG_DEBUG("numa_mirroring posix_memalign failed for %zu bytes with alignment %zu: %s\n",
324+
LLAMA_LOG_DEBUG("numa_mirroring: posix_memalign failed for %zu bytes with alignment %zu: %s\n",
325325
size, alignment, strerror(ret));
326326
// Restore original thread binding
327327
if (old_mask) {
@@ -344,23 +344,23 @@ struct llama_mmap::impl {
344344
numa_free_nodemask(old_mask);
345345
}
346346

347-
LLAMA_LOG_DEBUG("numa_mirroring First-touch allocation: %zu bytes for node %d at %p (SIMD aligned to %zu bytes)\n",
347+
LLAMA_LOG_DEBUG("numa_mirroring: First-touch allocation: %zu bytes for node %d at %p (SIMD aligned to %zu bytes)\n",
348348
size, node, ptr, alignment);
349349
return ptr;
350350
}
351351

352352
void mmap_numa_mirror(struct llama_file * file) {
353353
int num_nodes = numa_num_configured_nodes();
354354
if (num_nodes <= 1) {
355-
throw std::runtime_error("numa_mirroring NUMA mirror mode requires multiple NUMA nodes");
355+
throw std::runtime_error("numa_mirroring: NUMA mirror mode requires multiple NUMA nodes");
356356
}
357357

358-
LLAMA_LOG_INFO("numa_mirroring NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
358+
LLAMA_LOG_INFO("numa_mirroring: NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
359359
file->size() / (1024.0 * 1024.0), num_nodes);
360360

361361
size_t total_size = file->size();
362362
for (int node = 0; node < num_nodes; ++node) {
363-
LLAMA_LOG_INFO("numa_mirroring Allocating on node %d \n", node);
363+
LLAMA_LOG_INFO("numa_mirroring: Allocating on node %d \n", node);
364364

365365
void* node_mem = numa_alloc_first_touch(total_size, node);
366366
if (!node_mem) {
@@ -373,24 +373,24 @@ struct llama_mmap::impl {
373373
// VERIFICATION: Check that memory was actually allocated on the expected NUMA node
374374
int actual_node = -1;
375375
if (get_mempolicy(&actual_node, NULL, 0, node_mem, MPOL_F_NODE | MPOL_F_ADDR) == 0) {
376-
LLAMA_LOG_DEBUG("numa_mirroring Memory at %p allocated on node %d (expected %d)\n",
376+
LLAMA_LOG_DEBUG("numa_mirroring: Memory at %p allocated on node %d (expected %d)\n",
377377
node_mem, actual_node, node);
378378
if (actual_node != node) {
379-
LLAMA_LOG_WARN("numa_mirroring WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
379+
LLAMA_LOG_WARN("numa_mirroring: WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
380380
node, actual_node);
381381
} else {
382-
LLAMA_LOG_DEBUG("numa_mirroring First-touch succeeded - memory correctly placed on node %d\n", node);
382+
LLAMA_LOG_DEBUG("numa_mirroring: First-touch succeeded - memory correctly placed on node %d\n", node);
383383
}
384384
} else {
385-
LLAMA_LOG_WARN("numa_mirroring Could not verify allocation node for %p: %s\n",
385+
LLAMA_LOG_WARN("numa_mirroring: Could not verify allocation node for %p: %s\n",
386386
node_mem, strerror(errno));
387387
}
388388

389389
file->seek(0, SEEK_SET);
390390
file->read_raw(node_mem, total_size);
391391
numa_mappings.push_back({node_mem, total_size});
392392

393-
LLAMA_LOG_DEBUG("numa_mirroring Successfully allocated and populated %.2f MB on node %d at %p\n",
393+
LLAMA_LOG_DEBUG("numa_mirroring: Successfully allocated and populated %.2f MB on node %d at %p\n",
394394
total_size / (1024.0 * 1024.0), node, node_mem);
395395
}
396396
addr = numa_mappings.empty() ? nullptr : numa_mappings[0].addr;

0 commit comments

Comments
 (0)