Skip to content

Commit 53370cb

Browse files
committed
ggml: introduce GGML_NUMA_MIGRATE to optimize cross NUMA op computation
1 parent e434e69 commit 53370cb

File tree

15 files changed

+624
-10
lines changed

15 files changed

+624
-10
lines changed

common/arg.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,12 +2280,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22802280
"- distribute: spread execution evenly over all nodes\n"
22812281
"- isolate: only spawn threads on CPUs on the node that execution started on\n"
22822282
"- numactl: use the CPU map provided by numactl\n"
2283+
#ifdef GGML_USE_NUMA_MIGRATE
2284+
"- migrate: for affinity threads with page migration across NUMA nodes\n"
2285+
#endif
22832286
"if run without this previously, it is recommended to drop the system page cache before using this\n"
22842287
"see https://github.com/ggml-org/llama.cpp/issues/1437",
22852288
[](common_params & params, const std::string & value) {
22862289
/**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; }
22872290
else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
22882291
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
2292+
#ifdef GGML_USE_NUMA_MIGRATE
2293+
else if (value == "migrate") { params.numa = GGML_NUMA_STRATEGY_MIGRATE; }
2294+
#endif
22892295
else { throw std::invalid_argument("invalid value"); }
22902296
}
22912297
).set_env("LLAMA_ARG_NUMA"));

ggml/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
152152
"ggml: BLAS library vendor")
153153
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
154154

155+
option(GGML_NUMA_MIGRATE "ggml: use NUMA_MIGRATE" OFF)
156+
set(GGML_NUMA_MIGRATE_NODES "2" CACHE STRING
157+
"ggml: the number of NUMA nodes during page migration")
158+
option(GGML_NUMA_MIGRATE_DEBUG "ggml: enable debugging of NUMA_MIGRATE" OFF)
159+
155160
option(GGML_CUDA "ggml: use CUDA" OFF)
156161
option(GGML_MUSA "ggml: use MUSA" OFF)
157162
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)

ggml/include/ggml-backend.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,9 @@ extern "C" {
348348
// CPU buffer types are always available
349349
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
350350
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
351+
#ifdef GGML_USE_NUMA_MIGRATE
352+
GGML_API size_t ggml_backend_get_page_size(void);
353+
#endif
351354

352355
#ifdef __cplusplus
353356
}

ggml/include/ggml-cpu.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ extern "C" {
1212
struct ggml_cplan {
1313
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
1414
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
15+
#ifdef GGML_USE_NUMA_MIGRATE
16+
uint8_t * work_data_numa[GGML_NUMA_MIGRATE_NODES];
17+
#endif
1518

1619
int n_threads;
1720
struct ggml_threadpool * threadpool;
@@ -28,6 +31,9 @@ extern "C" {
2831
GGML_NUMA_STRATEGY_ISOLATE = 2,
2932
GGML_NUMA_STRATEGY_NUMACTL = 3,
3033
GGML_NUMA_STRATEGY_MIRROR = 4,
34+
#ifdef GGML_USE_NUMA_MIGRATE
35+
GGML_NUMA_STRATEGY_MIGRATE = 5,
36+
#endif
3137
GGML_NUMA_STRATEGY_COUNT
3238
};
3339

ggml/src/CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,27 @@ if (BUILD_SHARED_LIBS)
381381
target_compile_definitions(${target} PUBLIC GGML_SHARED)
382382
endforeach()
383383
endif()
384+
385+
if (GGML_NUMA_MIGRATE)
386+
find_path(NUMA_ROOT_DIR
387+
NAMES include/numa.h
388+
PATHS ENV NUMA_ROOT
389+
DOC "NUMA root directory")
390+
391+
find_library(NUMA_LIBRARY
392+
NAMES numa
393+
HINTS ${NUMA_ROOT_DIR}
394+
DOC "NUMA library")
395+
396+
if (NOT NUMA_LIBRARY)
397+
message(FATAL_ERROR "Could NOT find NUMA library.")
398+
endif()
399+
400+
if (GGML_NUMA_MIGRATE_DEBUG)
401+
target_compile_definitions(ggml-base PUBLIC GGML_USE_NUMA_MIGRATE GGML_NUMA_MIGRATE_NODES=${GGML_NUMA_MIGRATE_NODES} GGML_USE_NUMA_MIGRATE_DEBUG)
402+
else()
403+
target_compile_definitions(ggml-base PUBLIC GGML_USE_NUMA_MIGRATE GGML_NUMA_MIGRATE_NODES=${GGML_NUMA_MIGRATE_NODES})
404+
endif()
405+
406+
target_link_libraries(ggml-base PRIVATE ${NUMA_LIBRARY})
407+
endif()

ggml/src/ggml-alloc.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,22 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
948948
ggml_backend_buffer_type_t buft, size_t size,
949949
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
950950

951+
#ifdef GGML_USE_NUMA_MIGRATE
952+
size_t num_of_tensors = 0;
953+
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
954+
if (t->data == NULL) {
955+
if (t->view_src == NULL) {
956+
num_of_tensors++;
957+
}
958+
}
959+
}
960+
size_t ps = ggml_backend_get_page_size();
961+
size_t original_size = size;
962+
size += ps * num_of_tensors;
963+
GGML_LOG_DEBUG("alloc buffer for NUMA page migration, num of tensors: %ld, size increased from %ld to %ld, increased %ld MiB\n",
964+
num_of_tensors, original_size, size, (size - original_size) / 1024 / 1024);
965+
#endif
966+
951967
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
952968
if (buffer == NULL) {
953969
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);

0 commit comments

Comments
 (0)