@@ -2000,3 +2000,84 @@ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size)
20002000 GGML_ASSERT ((uintptr_t )ptr % TENSOR_ALIGNMENT == 0 && " buffer pointer must be aligned" );
20012001 return ggml_backend_buffer_init (ggml_backend_cpu_buffer_from_ptr_type (), ggml_backend_cpu_buffer_from_ptr_i, ptr, size);
20022002}
2003+
2004+ // NUMA buffer interface - similar to CPU, but with pages allocated accordingly to a NUMA first-touch policy
2005+
2006+ #include < sys/mman.h>
2007+
2008+ static void ggml_backend_numa_buffer_free_buffer (ggml_backend_buffer_t buffer) {
2009+ if (munmap ((char *) buffer->context , buffer->size )) {
2010+ GGML_LOG_WARN (" warning: munmap failed: %s\n " , strerror (errno));
2011+ }
2012+ }
2013+
2014+ static void ggml_backend_numa_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value) {
2015+ if (posix_madvise (buffer->context , buffer->size , POSIX_MADV_DONTNEED)) {
2016+ GGML_LOG_WARN (" warning: posix_madvise(.., POSIX_MADV_DONTNEED) failed: %s\n " ,
2017+ strerror (errno));
2018+ }
2019+ }
2020+
2021+ static const struct ggml_backend_buffer_i ggml_backend_numa_buffer_i = {
2022+ /* .free_buffer = */ ggml_backend_numa_buffer_free_buffer,
2023+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
2024+ /* .init_tensor = */ NULL , // no initialization required
2025+ /* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
2026+ /* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
2027+ /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
2028+ /* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
2029+ /* .clear = */ ggml_backend_numa_buffer_clear,
2030+ /* .reset = */ NULL ,
2031+ };
2032+
2033+ // NUMA buffer type - similar to CPU, but with pages allocated accordingly to a NUMA first-touch policy
2034+
2035+ static const char * ggml_backend_numa_buffer_type_get_name (ggml_backend_buffer_type_t buft) {
2036+ return " NUMA" ;
2037+
2038+ GGML_UNUSED (buft);
2039+ }
2040+
2041+ static ggml_backend_buffer_t ggml_backend_numa_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
2042+ int flags = MAP_SHARED | MAP_ANONYMOUS;
2043+ void * data = mmap (NULL , size, PROT_READ|PROT_WRITE, flags, -1 , 0 );
2044+ if (data == MAP_FAILED) {
2045+ GGML_LOG_ERROR (" %s: failed to allocate buffer of size %zu\n " , __func__, size);
2046+ return NULL ;
2047+ }
2048+ if (posix_madvise (data, size, POSIX_MADV_RANDOM)) {
2049+ GGML_LOG_WARN (" warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n " ,
2050+ strerror (errno));
2051+ }
2052+
2053+ return ggml_backend_buffer_init (buft, ggml_backend_numa_buffer_i, data, size);
2054+ }
2055+
2056+ static size_t ggml_backend_numa_buffer_type_get_alignment (ggml_backend_buffer_type_t buft) {
2057+ return TENSOR_ALIGNMENT;
2058+
2059+ GGML_UNUSED (buft);
2060+ }
2061+
2062+ static bool ggml_backend_numa_buffer_type_is_host (ggml_backend_buffer_type_t buft) {
2063+ return true ;
2064+
2065+ GGML_UNUSED (buft);
2066+ }
2067+
2068+ ggml_backend_buffer_type_t ggml_backend_numa_buffer_type (void ) {
2069+ static struct ggml_backend_buffer_type ggml_backend_numa_buffer_type = {
2070+ /* .iface = */ {
2071+ /* .get_name = */ ggml_backend_numa_buffer_type_get_name,
2072+ /* .alloc_buffer = */ ggml_backend_numa_buffer_type_alloc_buffer,
2073+ /* .get_alignment = */ ggml_backend_numa_buffer_type_get_alignment,
2074+ /* .get_max_size = */ NULL , // defaults to SIZE_MAX
2075+ /* .get_alloc_size = */ NULL , // defaults to ggml_nbytes
2076+ /* .is_host = */ ggml_backend_numa_buffer_type_is_host,
2077+ },
2078+ /* .device = */ NULL , // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
2079+ /* .context = */ NULL ,
2080+ };
2081+
2082+ return &ggml_backend_numa_buffer_type;
2083+ }
0 commit comments