@@ -335,59 +335,86 @@ struct llama_mmap::impl {
335335 // Set addr to the first mapping for node 0
336336 addr = (void *)(GGML_MMAP_VIRTUAL_MEMORY_BASE_OFFSET + base_address_offset);
337337
338+ // Calculate number of hugepages needed and total mapping size
339+ size_t hugepages_needed = (total_size + GGML_MMAP_HUGEPAGESZ - 1 ) / GGML_MMAP_HUGEPAGESZ;
340+ size_t total_mapping_size = hugepages_needed * GGML_MMAP_HUGEPAGESZ;
341+
342+ LLAMA_LOG_INFO (" Creating %zu hugepages (%zu bytes total) for %zu bytes of model data\n " ,
343+ hugepages_needed, total_mapping_size, total_size);
344+
338345 for (int node = 0 ; node < num_nodes; ++node) {
339346 numa_set_preferred (node);
340- LLAMA_LOG_INFO (" numa_set_preferred(%d)\n " , node);
347+ LLAMA_LOG_INFO (" numa_set_preferred(%d) - creating single large mapping \n " , node);
341348
342- for (i = 0 ; i * GGML_MMAP_HUGEPAGESZ < total_size; ++i) {
343- sprintf (path, " /dev/hugepages/llama-node%d-%d" , node, file_name_offset + i);
344- if (!is_new_mem[node]) {
345- is_new_mem[node] = access (path, F_OK) != 0 ;
346- }
347- int hugefd = open (path, O_CREAT | O_RDWR, 0600 );
348- if (hugefd < 0 ) {
349- // Clean up any mappings we've already created before throwing
350- for (const auto & mapping : numa_mappings) {
351- munmap (mapping.addr , mapping.size );
352- unlink (mapping.path .c_str ());
353- }
354- LLAMA_LOG_WARN (" failed to open hugepage fd %s: %d %s\n " ,
355- path, errno, strerror (errno));
356- throw std::runtime_error (format (" failed to open hugepage fd: %s" , strerror (errno)));
349+ // Create one large hugepage file for this entire NUMA node
350+ sprintf (path, " /dev/hugepages/llama-node%d-unified-%d" , node, file_name_offset);
351+ if (!is_new_mem[node]) {
352+ is_new_mem[node] = access (path, F_OK) != 0 ;
353+ }
354+
355+ int hugefd = open (path, O_CREAT | O_RDWR, 0600 );
356+ if (hugefd < 0 ) {
357+ // Clean up any mappings we've already created before throwing
358+ for (const auto & mapping : numa_mappings) {
359+ munmap (mapping.addr , mapping.size );
360+ unlink (mapping.path .c_str ());
357361 }
358- uintptr_t address = GGML_MMAP_VIRTUAL_MEMORY_BASE_OFFSET \
359- + node * GGML_MMAP_VIRTUAL_MEMORY_NUMA_INCREMENT + \
360- base_address_offset + i * GGML_MMAP_HUGEPAGESZ;
361- void * mm = mmap ((void *)address, GGML_MMAP_HUGEPAGESZ, PROT_READ | PROT_WRITE,
362- MAP_SHARED | MAP_HUGETLB | MAP_POPULATE,
363- hugefd, 0 );
362+ LLAMA_LOG_WARN (" failed to open hugepage fd %s: %d %s\n " ,
363+ path, errno, strerror (errno));
364+ throw std::runtime_error (format (" failed to open hugepage fd: %s" , strerror (errno)));
365+ }
366+
367+ // Resize the hugepage file to accommodate the entire mapping
368+ if (ftruncate (hugefd, total_mapping_size) != 0 ) {
364369 close (hugefd);
365- LLAMA_LOG_INFO (" mmap(%s) desire=%p size=%llu result=%p is_new_mem[%d]=%s\n " ,
366- path, (void *)address, GGML_MMAP_HUGEPAGESZ, mm, node, is_new_mem[node] ? " yes" : " no" );
367-
368- if (((uintptr_t )mm) != address) {
369- // If mmap failed completely, delete the file we just created
370- if (mm == MAP_FAILED) {
371- unlink (path);
372- }
373-
374- // Clean up any mappings we've already created before throwing
375- for (const auto & mapping : numa_mappings) {
376- munmap (mapping.addr , mapping.size );
377- unlink (mapping.path .c_str ());
378- }
379- LLAMA_LOG_WARN (" unable to mmap memory: %d %s\n " , errno, strerror (errno));
380- throw std::runtime_error (format (" mmap failed: %s" , strerror (errno)));
370+ unlink (path);
371+ // Clean up any mappings we've already created before throwing
372+ for (const auto & mapping : numa_mappings) {
373+ munmap (mapping.addr , mapping.size );
374+ unlink (mapping.path .c_str ());
375+ }
376+ LLAMA_LOG_WARN (" failed to resize hugepage file %s: %d %s\n " ,
377+ path, errno, strerror (errno));
378+ throw std::runtime_error (format (" ftruncate failed: %s" , strerror (errno)));
379+ }
380+
381+ // Create one large mapping for the entire model on this NUMA node
382+ uintptr_t address = GGML_MMAP_VIRTUAL_MEMORY_BASE_OFFSET +
383+ node * GGML_MMAP_VIRTUAL_MEMORY_NUMA_INCREMENT +
384+ base_address_offset;
385+
386+ void * mm = mmap ((void *)address, total_mapping_size, PROT_READ | PROT_WRITE,
387+ MAP_SHARED | MAP_HUGETLB | MAP_POPULATE, hugefd, 0 );
388+ close (hugefd);
389+
390+ LLAMA_LOG_INFO (" mmap(%s) desire=%p size=%zu result=%p is_new_mem[%d]=%s\n " ,
391+ path, (void *)address, total_mapping_size, mm, node, is_new_mem[node] ? " yes" : " no" );
392+
393+ if (((uintptr_t )mm) != address) {
394+ // If mmap failed completely, delete the file we just created
395+ if (mm == MAP_FAILED) {
396+ unlink (path);
381397 }
382398
383- // Only store valid mappings
384- numa_mappings.push_back ({mm, GGML_MMAP_HUGEPAGESZ, std::string (path)});
385-
386- if (is_new_mem[node]) {
387- memset (mm, 0 , GGML_MMAP_HUGEPAGESZ);
399+ // Clean up any mappings we've already created before throwing
400+ for (const auto & mapping : numa_mappings) {
401+ munmap (mapping.addr , mapping.size );
402+ unlink (mapping.path .c_str ());
388403 }
404+ LLAMA_LOG_WARN (" unable to mmap memory: %d %s\n " , errno, strerror (errno));
405+ throw std::runtime_error (format (" mmap failed: %s" , strerror (errno)));
406+ }
407+
408+ // Store the single large mapping
409+ numa_mappings.push_back ({mm, total_mapping_size, std::string (path)});
410+
411+ if (is_new_mem[node]) {
412+ memset (mm, 0 , total_mapping_size);
389413 }
390414 }
415+
416+ // Update global offset tracking
417+ i = hugepages_needed;
391418 base_address_offset += i * GGML_MMAP_HUGEPAGESZ;
392419 file_name_offset += i;
393420 if (is_new_mem[0 ]) {
@@ -484,59 +511,86 @@ struct llama_mmap::impl {
484511 // Set addr to the first mapping for node 0
485512 addr = (void *)(GGML_MMAP_VIRTUAL_MEMORY_BASE_OFFSET + base_address_offset);
486513
514+ // Calculate number of hugepages needed and total mapping size
515+ size_t hugepages_needed = (total_size + GGML_MMAP_HUGEPAGESZ - 1 ) / GGML_MMAP_HUGEPAGESZ;
516+ size_t total_mapping_size = hugepages_needed * GGML_MMAP_HUGEPAGESZ;
517+
518+ LLAMA_LOG_INFO (" Creating unified mapping: %zu hugepages (%zu bytes total) for %zu bytes across %zu files\n " ,
519+ hugepages_needed, total_mapping_size, total_size, files.size ());
520+
487521 for (int node = 0 ; node < num_nodes; ++node) {
488522 numa_set_preferred (node);
489- LLAMA_LOG_INFO (" numa_set_preferred(%d) for unified mapping\n " , node);
523+ LLAMA_LOG_INFO (" numa_set_preferred(%d) - creating single unified mapping\n " , node);
490524
491- for (i = 0 ; i * GGML_MMAP_HUGEPAGESZ < total_size; ++i) {
492- sprintf (path, " /dev/hugepages/llama-unified-node%d-%d" , node, file_name_offset + i);
493- if (!is_new_mem[node]) {
494- is_new_mem[node] = access (path, F_OK) != 0 ;
495- }
496- int hugefd = open (path, O_CREAT | O_RDWR, 0600 );
497- if (hugefd < 0 ) {
498- // Clean up any mappings we've already created before throwing
499- for (const auto & mapping : numa_mappings) {
500- munmap (mapping.addr , mapping.size );
501- unlink (mapping.path .c_str ());
502- }
503- LLAMA_LOG_WARN (" failed to open hugepage fd %s: %d %s\n " ,
504- path, errno, strerror (errno));
505- throw std::runtime_error (format (" failed to open hugepage fd: %s" , strerror (errno)));
525+ // Create one large hugepage file for this entire unified mapping
526+ sprintf (path, " /dev/hugepages/llama-unified-node%d-%d" , node, file_name_offset);
527+ if (!is_new_mem[node]) {
528+ is_new_mem[node] = access (path, F_OK) != 0 ;
529+ }
530+
531+ int hugefd = open (path, O_CREAT | O_RDWR, 0600 );
532+ if (hugefd < 0 ) {
533+ // Clean up any mappings we've already created before throwing
534+ for (const auto & mapping : numa_mappings) {
535+ munmap (mapping.addr , mapping.size );
536+ unlink (mapping.path .c_str ());
506537 }
507- uintptr_t address = GGML_MMAP_VIRTUAL_MEMORY_BASE_OFFSET \
508- + node * GGML_MMAP_VIRTUAL_MEMORY_NUMA_INCREMENT + \
509- base_address_offset + i * GGML_MMAP_HUGEPAGESZ;
510- void * mm = mmap ((void *)address, GGML_MMAP_HUGEPAGESZ, PROT_READ | PROT_WRITE,
511- MAP_SHARED | MAP_HUGETLB | MAP_POPULATE,
512- hugefd, 0 );
538+ LLAMA_LOG_WARN (" failed to open hugepage fd %s: %d %s\n " ,
539+ path, errno, strerror (errno));
540+ throw std::runtime_error (format (" failed to open hugepage fd: %s" , strerror (errno)));
541+ }
542+
543+ // Resize the hugepage file to accommodate the entire unified mapping
544+ if (ftruncate (hugefd, total_mapping_size) != 0 ) {
513545 close (hugefd);
514- LLAMA_LOG_INFO (" mmap(%s) desire=%p size=%llu result=%p is_new_mem[%d]=%s\n " ,
515- path, (void *)address, GGML_MMAP_HUGEPAGESZ, mm, node, is_new_mem[node] ? " yes" : " no" );
516-
517- if (((uintptr_t )mm) != address) {
518- // If mmap failed completely, delete the file we just created
519- if (mm == MAP_FAILED) {
520- unlink (path);
521- }
522-
523- // Clean up any mappings we've already created before throwing
524- for (const auto & mapping : numa_mappings) {
525- munmap (mapping.addr , mapping.size );
526- unlink (mapping.path .c_str ());
527- }
528- LLAMA_LOG_WARN (" unable to mmap memory: %d %s\n " , errno, strerror (errno));
529- throw std::runtime_error (format (" mmap failed: %s" , strerror (errno)));
546+ unlink (path);
547+ // Clean up any mappings we've already created before throwing
548+ for (const auto & mapping : numa_mappings) {
549+ munmap (mapping.addr , mapping.size );
550+ unlink (mapping.path .c_str ());
551+ }
552+ LLAMA_LOG_WARN (" failed to resize hugepage file %s: %d %s\n " ,
553+ path, errno, strerror (errno));
554+ throw std::runtime_error (format (" ftruncate failed: %s" , strerror (errno)));
555+ }
556+
557+ // Create one large mapping for the entire unified model on this NUMA node
558+ uintptr_t address = GGML_MMAP_VIRTUAL_MEMORY_BASE_OFFSET +
559+ node * GGML_MMAP_VIRTUAL_MEMORY_NUMA_INCREMENT +
560+ base_address_offset;
561+
562+ void * mm = mmap ((void *)address, total_mapping_size, PROT_READ | PROT_WRITE,
563+ MAP_SHARED | MAP_HUGETLB | MAP_POPULATE, hugefd, 0 );
564+ close (hugefd);
565+
566+ LLAMA_LOG_INFO (" mmap(%s) desire=%p size=%zu result=%p is_new_mem[%d]=%s\n " ,
567+ path, (void *)address, total_mapping_size, mm, node, is_new_mem[node] ? " yes" : " no" );
568+
569+ if (((uintptr_t )mm) != address) {
570+ // If mmap failed completely, delete the file we just created
571+ if (mm == MAP_FAILED) {
572+ unlink (path);
530573 }
531574
532- // Only store valid mappings
533- numa_mappings.push_back ({mm, GGML_MMAP_HUGEPAGESZ, std::string (path)});
534-
535- if (is_new_mem[node]) {
536- memset (mm, 0 , GGML_MMAP_HUGEPAGESZ);
575+ // Clean up any mappings we've already created before throwing
576+ for (const auto & mapping : numa_mappings) {
577+ munmap (mapping.addr , mapping.size );
578+ unlink (mapping.path .c_str ());
537579 }
580+ LLAMA_LOG_WARN (" unable to mmap memory: %d %s\n " , errno, strerror (errno));
581+ throw std::runtime_error (format (" mmap failed: %s" , strerror (errno)));
582+ }
583+
584+ // Store the single large mapping
585+ numa_mappings.push_back ({mm, total_mapping_size, std::string (path)});
586+
587+ if (is_new_mem[node]) {
588+ memset (mm, 0 , total_mapping_size);
538589 }
539590 }
591+
592+ // Update global offset tracking
593+ i = hugepages_needed;
540594 base_address_offset += i * GGML_MMAP_HUGEPAGESZ;
541595 file_name_offset += i;
542596
0 commit comments