Skip to content

Commit 600cba7

Browse files
committed
iterate - CONT kernel (just a wrapper around CPY, as the reference is)
1 parent 1fdde7c commit 600cba7

File tree

10 files changed

+670
-4
lines changed

10 files changed

+670
-4
lines changed

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
4343
ggml-cpu/numa-kernels/numa-kernels.h
4444
ggml-cpu/numa-kernels/add.c
4545
ggml-cpu/numa-kernels/add.h
46+
ggml-cpu/numa-kernels/cont.c
47+
ggml-cpu/numa-kernels/cont.h
4648
ggml-cpu/numa-kernels/cpy.c
4749
ggml-cpu/numa-kernels/cpy.h
4850
ggml-cpu/numa-kernels/get_rows.c
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/**
2+
* @file cont.c
3+
* @brief NUMA CONT kernel implementation as thin wrapper around CPY
4+
*
5+
* CONT operations in ggml are identical to CPY operations in terms of actual
6+
* computation - both copy data from source to destination. The difference is
7+
* only semantic in the graph representation. This implementation leverages
8+
* the sophisticated CPY kernel as a thin wrapper.
9+
*
10+
* @author David Sanftenberg
11+
* @date 2024
12+
*/
13+
14+
#include "cont.h"
15+
#include "cpy.h" // Import CPY kernel functions
16+
#include "numa-kernels.h"
17+
#include "ggml-numa-openmp-coordinator.h"
18+
19+
/**
20+
* @brief NUMA CONT kernel execution function - thin wrapper around CPY
21+
*
22+
* CONT operations are semantically identical to CPY operations in terms of
23+
* data movement. This implementation delegates to the sophisticated CPY kernel
24+
* which handles all quantization types, optimization strategies, and NUMA-aware
25+
* execution patterns.
26+
*
27+
* @param work_context Tensor to process (cast to ggml_tensor*)
28+
* @param params Compute parameters with thread info
29+
* @return GGML_STATUS_SUCCESS on success, error code on failure
30+
*/
31+
enum ggml_status ggml_numa_kernel_cont_execute(void * work_context, struct ggml_compute_params * params) {
32+
// CONT is identical to CPY in terms of data movement
33+
// Delegate to the sophisticated CPY kernel implementation
34+
return ggml_numa_kernel_cpy_execute(work_context, params);
35+
}
36+
37+
// Use the new streamlined registration system - CONT as standard operation
38+
NUMA_KERNEL_REGISTER_METADATA(
39+
cont, // op_name
40+
GGML_OP_CONT, // ggml_op_type
41+
"NUMA CONT Kernel (CPY wrapper)", // kernel_display_name
42+
256, // threshold_single_single (same as CPY)
43+
512, // threshold_single_multi (same as CPY)
44+
ggml_numa_kernel_cont_execute // execute_function
45+
)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* @file cont.h
3+
* @brief NUMA CONT kernel interface (thin wrapper around CPY)
4+
*
5+
* CONT operations ensure tensor data is stored contiguously in memory.
6+
* This implementation provides a thin wrapper around the sophisticated CPY kernel.
7+
*
8+
* @author David Sanftenberg
9+
* @date 2024
10+
*/
11+
12+
#pragma once
13+
14+
#include "numa-kernels.h"
15+
16+
#ifdef __cplusplus
17+
extern "C" {
18+
#endif
19+
20+
/**
21+
* @brief NUMA CONT kernel execution function (thin wrapper around CPY)
22+
*
23+
* @param work_context Tensor to process (cast to ggml_tensor*)
24+
* @param params Compute parameters with thread info
25+
* @return GGML_STATUS_SUCCESS on success, error code on failure
26+
*/
27+
enum ggml_status ggml_numa_kernel_cont_execute(void * work_context, struct ggml_compute_params * params);
28+
29+
// ============================================================================
30+
// Registration Functions (Auto-generated by NUMA_KERNEL_REGISTER_METADATA macro)
31+
// ============================================================================
32+
33+
/**
34+
* @brief Query function for CONT strategy selection (auto-generated)
35+
*/
36+
ggml_numa_execution_strategy_t ggml_numa_kernel_cont_query(const struct ggml_tensor * tensor);
37+
38+
/**
39+
* @brief Work buffer calculation function for CONT (auto-generated)
40+
*/
41+
size_t ggml_numa_kernel_cont_work_buffer_calc(const struct ggml_tensor * tensor, int total_numa_nodes, int total_threads);
42+
43+
/**
44+
* @brief Registration function for CONT kernel (auto-generated)
45+
*/
46+
ggml_numa_kernel_registration_info_t ggml_numa_kernel_cont_register(void);
47+
48+
#ifdef __cplusplus
49+
}
50+
#endif

ggml/src/ggml-cpu/numa-kernels/cpy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ enum ggml_status ggml_numa_kernel_cpy_execute(void * work_context, struct ggml_c
386386

387387
struct ggml_tensor * src0 = dst->src[0];
388388
NUMA_ASSERT(src0 != NULL, "Source tensor cannot be null");
389-
NUMA_ASSERT(dst->op == GGML_OP_CPY, "Expected CPY operation");
389+
NUMA_ASSERT(dst->op == GGML_OP_CPY || dst->op == GGML_OP_CONT, "Expected CPY or CONT operation");
390390
NUMA_ASSERT(ggml_nelements(dst) == ggml_nelements(src0), "Element count must match");
391391

392392
// Additional validation

ggml/src/ggml-cpu/numa-kernels/glu.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,10 @@ enum ggml_status ggml_numa_kernel_glu_execute(void * work_context, struct ggml_c
154154
// ============================================================================
155155

156156
NUMA_KERNEL_REGISTER_METADATA(
157-
glu, // kernel name
157+
glu, // kernel name
158158
GGML_OP_GLU, // operation type
159159
"NUMA GLU Kernel", // kernel description
160-
1024, // single_single threshold
161-
2048, // single_multi threshold
160+
4096, // single_single threshold
161+
8192, // single_multi threshold
162162
ggml_numa_kernel_glu_execute // execution function
163163
)

ggml/src/ggml-cpu/numa-kernels/numa-kernels.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "permute.h"
2929
#include "rms_norm.h"
3030
#include "cpy.h"
31+
#include "cont.h"
3132
#include "get_rows.h"
3233
#include "../ggml-impl.h"
3334
#include "../ggml-vec-numa.h"
@@ -338,6 +339,7 @@ enum ggml_status ggml_numa_kernels_init(void) {
338339

339340
// Register data movement kernels:
340341
NUMA_REGISTER_KERNEL(cpy);
342+
NUMA_REGISTER_KERNEL(cont); // CONT as thin wrapper around CPY
341343
NUMA_REGISTER_KERNEL(get_rows);
342344

343345
// Register reduction kernels:

ggml/src/ggml-cpu/numa-kernels/numa-kernels.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,64 @@ bool ggml_numa_apply_kernel_force_strategy(ggml_numa_kernel_query_result_t * res
404404
} \
405405
} while(0)
406406

407+
/**
408+
* NUMA_REGISTER_SYMLINK_KERNEL - Macro to register one operation as a symlink to another
409+
*
410+
* This macro allows one operation to use another operation's kernel implementation.
411+
* Perfect for cases like CONT → CPY where operations have identical underlying logic.
412+
*
413+
* @param symlink_kname - The operation to create a symlink for (e.g., cont)
414+
* @param target_kname - The target operation to symlink to (e.g., cpy)
415+
* @param symlink_op_type - The GGML operation type constant for the symlink (e.g., GGML_OP_CONT)
416+
*
417+
* Usage example:
418+
* NUMA_REGISTER_SYMLINK_KERNEL(cont, cpy, GGML_OP_CONT);
419+
*
420+
* This will register GGML_OP_CONT to use the CPY kernel's functions.
421+
* The target kernel must already be registered.
422+
*/
423+
#define NUMA_REGISTER_SYMLINK_KERNEL(symlink_kname, target_kname, symlink_op_type) do { \
424+
/* Get the target kernel's registration info */ \
425+
ggml_numa_kernel_registration_info_t target_info = ggml_numa_kernel_##target_kname##_register(); \
426+
ggml_numa_kernel_query_fn_t target_query_fn = ggml_numa_kernel_##target_kname##_query; \
427+
ggml_numa_kernel_work_buffer_calc_fn_t target_work_buffer_fn = NULL; \
428+
/* Check if target kernel provides work buffer calculation function */ \
429+
if (target_info.work_buffer_calc_fn != NULL) { \
430+
target_work_buffer_fn = target_info.work_buffer_calc_fn; \
431+
} \
432+
/* Create symlink registration info using target's functions but symlink's operation type */ \
433+
ggml_numa_kernel_registration_info_t symlink_info = {0}; /* Initialize to zero */ \
434+
symlink_info.op_type = symlink_op_type; /* Use the provided operation type */ \
435+
symlink_info.strategy_array = target_info.strategy_array; \
436+
symlink_info.work_funcs = target_info.work_funcs; \
437+
symlink_info.agg_funcs = target_info.agg_funcs; \
438+
symlink_info.work_buffer_calc_fn = target_info.work_buffer_calc_fn; \
439+
symlink_info.supported = target_info.supported; \
440+
symlink_info.is_noop = target_info.is_noop; \
441+
snprintf(symlink_info.kernel_name, sizeof(symlink_info.kernel_name), "NUMA %s → %s Symlink", \
442+
#symlink_kname, #target_kname); \
443+
/* Register the symlink using target's functions */ \
444+
enum ggml_status symlink_result = ggml_numa_register_kernel_strategy( \
445+
symlink_info.op_type, &target_info.strategy_array, \
446+
&target_info.work_funcs, &target_info.agg_funcs, target_query_fn, target_work_buffer_fn, \
447+
target_info.supported, target_info.is_noop); \
448+
if (symlink_result != GGML_STATUS_SUCCESS) { \
449+
NUMA_LOG_ERROR("Failed to register " #symlink_kname " → " #target_kname " symlink"); \
450+
return symlink_result; \
451+
} \
452+
if (target_info.supported) { \
453+
NUMA_LOG_DEBUG("🔗 Symlinked %s → %s (thresholds: %zu/%zu%s%s)", \
454+
#symlink_kname, #target_kname, \
455+
target_info.strategy_array.thresholds[NUMA_STRATEGY_IDX_SINGLE_SINGLE], \
456+
target_info.strategy_array.thresholds[NUMA_STRATEGY_IDX_SINGLE_MULTI], \
457+
target_info.is_noop ? ", no-op" : "", \
458+
target_work_buffer_fn ? ", work-buffer" : ""); \
459+
} else { \
460+
NUMA_LOG_DEBUG("🚫 Disabled symlink %s → %s (target marked as unsupported)", \
461+
#symlink_kname, #target_kname); \
462+
} \
463+
} while(0)
464+
407465
// =============================================================================
408466
// SHARED KERNEL FUNCTION MACROS - Eliminate Code Duplication
409467
// =============================================================================

tests/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,12 @@ target_link_libraries(${LLAMA_TEST_NAME} PRIVATE ggml ggml-cpu common OpenMP::Op
227227
# Prioritize ggml/src/ggml-cpu/ to get the full implementation headers and add ggml/src for ggml-impl.h
228228
target_include_directories(${LLAMA_TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/ggml/src/ggml-cpu ${CMAKE_SOURCE_DIR}/ggml/src ${CMAKE_SOURCE_DIR}/ggml/include)
229229

230+
# test-numa-mathematical-correctness-cont
231+
set(LLAMA_TEST_NAME test-numa-mathematical-correctness-cont)
232+
llama_build_and_test(test-numa-mathematical-correctness-cont.cpp)
233+
target_link_libraries(${LLAMA_TEST_NAME} PRIVATE ggml ggml-cpu common OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
234+
target_include_directories(${LLAMA_TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/ggml/src/ggml-cpu ${CMAKE_SOURCE_DIR}/ggml/src ${CMAKE_SOURCE_DIR}/ggml/include)
235+
230236
# test-numa-mathematical-correctness-mul
231237
set(LLAMA_TEST_NAME test-numa-mathematical-correctness-mul)
232238
llama_build_and_test(test-numa-mathematical-correctness-mul.cpp)

tests/run-numa-tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ BIN_DIR="$BUILD_DIR/bin"
5656
# Test binaries to run (in order of complexity)
5757
NUMA_TESTS=(
5858
"test-numa-mathematical-correctness-add"
59+
"test-numa-mathematical-correctness-cont"
5960
"test-numa-mathematical-correctness-cpy"
6061
"test-numa-mathematical-correctness-get_rows"
6162
"test-numa-mathematical-correctness-rope"

0 commit comments

Comments
 (0)