@@ -22,6 +22,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
2222 ggml-cpu/amx/amx.h
2323 ggml-cpu/amx/mmq.cpp
2424 ggml-cpu/amx/mmq.h
25+ ggml-cpu/tmac/tmac.cpp
26+ ggml-cpu/tmac/tmac.h
27+ ggml-cpu/tmac/lut_mul_mat.cpp
28+ ggml-cpu/tmac/lut_mul_mat.h
29+ ggml-cpu/tmac/lut_ctor.cpp
30+ ggml-cpu/tmac/lut_ctor.h
31+ ggml-cpu/tmac/tbl.cpp
32+ ggml-cpu/tmac/tbl.h
2533 ggml-cpu/ggml-cpu-impl.h
2634 ggml-cpu/common.h
2735 ggml-cpu/binary-ops.h
@@ -72,6 +80,36 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
7280 ggml-cpu/llamafile/sgemm.h)
7381 endif ()
7482
83+ if (GGML_TMAC)
84+ target_compile_definitions (${GGML_CPU_NAME} PUBLIC GGML_USE_TMAC)
85+ target_include_directories (${GGML_CPU_NAME} PUBLIC ggml-cpu/tmac)
86+ get_target_property (cdefs ${GGML_CPU_NAME} COMPILE_DEFINITIONS )
87+ message (STATUS "GGML_CPU_NAME: ${GGML_CPU_NAME} COMPILE_DEFINITIONS: ${cdefs} " )
88+
89+ # set(GGML_HEADERS_TMAC
90+ # ggml-cpu/tmac/lut_ctor.h
91+ # ggml-cpu/tmac/tbl.h
92+ # ggml-cpu/tmac/ggml-tmac.h
93+ # ../../common/log.h
94+ # )
95+ # set(GGML_SOURCES_TMAC
96+ # ggml-cpu/tmac/lut_ctor.cpp
97+ # ggml-cpu/tmac/tbl.cpp
98+ # ggml-cpu/tmac/ggml-tmac.cpp
99+ # ../../common/log.cpp
100+ # )
101+ # list (APPEND GGML_CPU_SOURCES ${GGML_SOURCES_TMAC} ${GGML_HEADERS_TMAC})
102+
103+ if ((NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang" ) OR
104+ (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" ))
105+ message (FATAL_ERROR "Clang is required for T-MAC compilation" )
106+ endif ()
107+
108+ if (GGML_TMAC_RECHUNK)
109+ target_compile_definitions (${GGML_CPU_NAME} PRIVATE TMAC_RECHUNK)
110+ endif ()
111+ endif ()
112+
75113 if (GGML_CPU_HBM)
76114 find_library (memkind memkind REQUIRED)
77115
@@ -145,6 +183,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
145183 list (APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH} )
146184 endif ()
147185 endif ()
186+ if (GGML_TMAC)
187+ # ARM Windows with LLVM clang GNU interface
188+ # We need fullfp16 for T-MAC
189+ # TODO: check_cxx_source_compiles
190+ list (APPEND ARCH_FLAGS -march=armv8.2a+fp16)
191+ endif ()
148192
149193 # show enabled features
150194 if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows" )
@@ -181,7 +225,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
181225 if (GGML_NATIVE)
182226 include (ggml-cpu/cmake/FindSIMD.cmake)
183227 endif ()
184- if (GGML_AVX512)
228+ # Can't use GGML_AVX512 with T-MAC and Clang for MSVC
229+ # with error: conflicting types for '_m_prefetchw
230+ if (GGML_AVX512 AND (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang" ) AND (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" ))
185231 list (APPEND ARCH_FLAGS /arch:AVX512)
186232 # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
187233 # MSVC has no compile-time flags enabling specific
@@ -323,6 +369,19 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
323369 list (APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE} )
324370 endif ()
325371 endif ()
372+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" AND GGML_TMAC)
373+ # We need fullfp16 for T-MAC
374+ # TODO: we need to simplify this logic through check_cxx_source_compiles or Presets?
375+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
376+ if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
377+ # Device with armv8.7a+ cpu, e.g., WSL on Surface Laptop 7
378+ # based on arm64-windows-llvm.cmake
379+ list (APPEND ARCH_FLAGS -march=armv8.7-a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only)
380+ add_compile_definitions (__ARM_FEATURE_MATMUL_INT8)
381+ else ()
382+ # Jetson AGX Orin, Raspberry Pi 5
383+ list (APPEND ARCH_FLAGS -march=armv8.2a+fp16)
384+ endif ()
326385 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64" )
327386 message (STATUS "loongarch64 detected" )
328387
0 commit comments