@@ -308,15 +308,15 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
308308 // Bind current thread to the target NUMA node for first-touch
309309 struct bitmask* old_mask = numa_get_run_node_mask();
310310 if (numa_run_on_node(node) != 0) {
311- LLAMA_LOG_DEBUG ("numa_mirroring Warning: could not bind thread to NUMA node %d: %s\n", node, strerror(errno));
311+ LLAMA_LOG_DEBUG ("numa_mirroring: Warning: could not bind thread to NUMA node %d: %s\n", node, strerror(errno));
312312 // Continue anyway - might still work
313313 }
314314
315315 // Use posix_memalign for SIMD alignment
316316 void* ptr = nullptr;
317317 int ret = posix_memalign(&ptr, alignment, size);
318318 if (ret != 0) {
319- LLAMA_LOG_DEBUG("numa_mirroring posix_memalign failed for %zu bytes with alignment %zu: %s\n",
319+ LLAMA_LOG_DEBUG("numa_mirroring: posix_memalign failed for %zu bytes with alignment %zu: %s\n",
320320 size, alignment, strerror(ret));
321321 // Restore original thread binding
322322 if (old_mask) {
@@ -339,23 +339,23 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
339339 numa_free_nodemask(old_mask);
340340 }
341341
342- LLAMA_LOG_DEBUG("numa_mirroring First-touch allocation: %zu bytes for node %d at %p (SIMD aligned to %zu bytes)\n",
342+ LLAMA_LOG_DEBUG("numa_mirroring: First-touch allocation: %zu bytes for node %d at %p (SIMD aligned to %zu bytes)\n",
343343 size, node, ptr, alignment);
344344 return ptr;
345345 }
346346
347347 void mmap_numa_mirror(struct llama_file * file) {
348348 int num_nodes = numa_num_configured_nodes();
349349 if (num_nodes <= 1) {
350- throw std::runtime_error("numa_mirroring NUMA mirror mode requires multiple NUMA nodes");
350+ throw std::runtime_error("numa_mirroring: NUMA mirror mode requires multiple NUMA nodes");
351351 }
352352
353- LLAMA_LOG_INFO ("numa_mirroring NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
353+ LLAMA_LOG_INFO ("numa_mirroring: NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
354354 file->size() / (1024.0 * 1024.0), num_nodes);
355355
356356 size_t total_size = file->size();
357357 for (int node = 0; node < num_nodes; ++node) {
358- LLAMA_LOG_INFO("numa_mirroring Allocating on node %d \n", node);
358+ LLAMA_LOG_INFO("numa_mirroring: Allocating on node %d \n", node);
359359
360360 void* node_mem = numa_alloc_first_touch(total_size, node);
361361 if (!node_mem) {
@@ -368,24 +368,24 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
368368 // VERIFICATION: Check that memory was actually allocated on the expected NUMA node
369369 int actual_node = -1;
370370 if (get_mempolicy(&actual_node, NULL, 0, node_mem, MPOL_F_NODE | MPOL_F_ADDR) == 0) {
371- LLAMA_LOG_DEBUG("numa_mirroring Memory at %p allocated on node %d (expected %d)\n",
371+ LLAMA_LOG_DEBUG("numa_mirroring: Memory at %p allocated on node %d (expected %d)\n",
372372 node_mem, actual_node, node);
373373 if (actual_node != node) {
374- LLAMA_LOG_WARN("numa_mirroring WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
374+ LLAMA_LOG_WARN("numa_mirroring: WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
375375 node, actual_node);
376376 } else {
377- LLAMA_LOG_DEBUG("numa_mirroring First-touch succeeded - memory correctly placed on node %d\n", node);
377+ LLAMA_LOG_DEBUG("numa_mirroring: First-touch succeeded - memory correctly placed on node %d\n", node);
378378 }
379379 } else {
380- LLAMA_LOG_WARN("numa_mirroring Could not verify allocation node for %p: %s\n",
380+ LLAMA_LOG_WARN("numa_mirroring: Could not verify allocation node for %p: %s\n",
381381 node_mem, strerror(errno));
382382 }
383383
384384 file->seek(0, SEEK_SET);
385385 file->read_raw(node_mem, total_size);
386386 numa_mappings.push_back({node_mem, total_size});
387387
388- LLAMA_LOG_DEBUG ("numa_mirroring Successfully allocated and populated %.2f MB on node %d at %p\n",
388+ LLAMA_LOG_DEBUG ("numa_mirroring: Successfully allocated and populated %.2f MB on node %d at %p\n",
389389 total_size / (1024.0 * 1024.0), node, node_mem);
390390 }
391391 addr = numa_mappings.empty() ? nullptr : numa_mappings[ 0] .addr;
@@ -394,15 +394,15 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
394394 void mmap_numa_mirror(struct llama_file * file) {
395395 int num_nodes = numa_num_configured_nodes();
396396 if (num_nodes <= 1) {
397- throw std::runtime_error("numa_mirroring NUMA mirror mode requires multiple NUMA nodes");
397+ throw std::runtime_error("numa_mirroring: NUMA mirror mode requires multiple NUMA nodes");
398398 }
399399
400- LLAMA_LOG_INFO ("numa_mirroring NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
400+ LLAMA_LOG_INFO ("numa_mirroring: NUMA mirroring enabled - allocating %.2f MB on each of %d nodes using first-touch\n",
401401 file->size() / (1024.0 * 1024.0), num_nodes);
402402
403403 size_t total_size = file->size();
404404 for (int node = 0; node < num_nodes; ++node) {
405- LLAMA_LOG_INFO("numa_mirroring Allocating on node %d \n", node);
405+ LLAMA_LOG_INFO("numa_mirroring: Allocating on node %d \n", node);
406406
407407 void* node_mem = numa_alloc_first_touch(total_size, node);
408408 if (!node_mem) {
@@ -415,24 +415,24 @@ In `llama-mmap.cpp`: First-touch allocation at model weight loading time
415415 // VERIFICATION: Check that memory was actually allocated on the expected NUMA node
416416 int actual_node = -1;
417417 if (get_mempolicy(&actual_node, NULL, 0, node_mem, MPOL_F_NODE | MPOL_F_ADDR) == 0) {
418- LLAMA_LOG_DEBUG("numa_mirroring Memory at %p allocated on node %d (expected %d)\n",
418+ LLAMA_LOG_DEBUG("numa_mirroring: Memory at %p allocated on node %d (expected %d)\n",
419419 node_mem, actual_node, node);
420420 if (actual_node != node) {
421- LLAMA_LOG_WARN("numa_mirroring WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
421+ LLAMA_LOG_WARN("numa_mirroring: WARNING: Memory allocated on wrong node! Expected %d, got %d\n",
422422 node, actual_node);
423423 } else {
424- LLAMA_LOG_DEBUG("numa_mirroring First-touch succeeded - memory correctly placed on node %d\n", node);
424+ LLAMA_LOG_DEBUG("numa_mirroring: First-touch succeeded - memory correctly placed on node %d\n", node);
425425 }
426426 } else {
427- LLAMA_LOG_WARN("numa_mirroring Could not verify allocation node for %p: %s\n",
427+ LLAMA_LOG_WARN("numa_mirroring: Could not verify allocation node for %p: %s\n",
428428 node_mem, strerror(errno));
429429 }
430430
431431 file->seek(0, SEEK_SET);
432432 file->read_raw(node_mem, total_size);
433433 numa_mappings.push_back({node_mem, total_size});
434434
435- LLAMA_LOG_DEBUG ("numa_mirroring Successfully allocated and populated %.2f MB on node %d at %p\n",
435+ LLAMA_LOG_DEBUG ("numa_mirroring: Successfully allocated and populated %.2f MB on node %d at %p\n",
436436 total_size / (1024.0 * 1024.0), node, node_mem);
437437 }
438438 addr = numa_mappings.empty() ? nullptr : numa_mappings[ 0] .addr;
0 commit comments