Skip to content

Commit e1936eb

Browse files
mingfeimaggerganov
authored andcommitted
add amx kernel for gemm (llama/8998)
add intel amx isa detection add vnni kernel for gemv cases add vnni and amx kernel support for block_q8_0 code cleanup fix packing B issue enable openmp fine tune amx kernel switch to aten parallel pattern add error message for nested parallelism code cleanup add f16 support in ggml-amx add amx kernels for QK_K quant formats: Q4_K, Q5_K, Q6_K and IQ4_XS update CMakeList update README fix some compilation warning fix compiler warning when amx is not enabled minor change ggml-ci move ggml_amx_init from ggml.c to ggml-amx/mmq.cpp ggml-ci update CMakeLists with -mamx-tile, -mamx-int8 and -mamx-bf16 ggml-ci add amx as an ggml-backend update header file, the old path for immintrin.h has changed to ggml-cpu-impl.h minor change update CMakeLists.txt minor change apply weight prepacking in set_tensor method in ggml-backend fix compile error ggml-ci minor change ggml-ci update CMakeLists.txt ggml-ci add march dependency minor change ggml-ci change ggml_backend_buffer_is_host to return false for amx backend ggml-ci fix supports_op use device reg for AMX backend ggml-ci minor change ggml-ci minor change fix rebase set .buffer_from_host_ptr to be false for AMX backend
1 parent 28b044d commit e1936eb

File tree

5 files changed

+66
-1
lines changed

5 files changed

+66
-1
lines changed

ggml/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ option(GGML_AVX512 "ggml: enable AVX512" OFF)
9999
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
100100
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
101101
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
102+
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
103+
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
104+
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
102105
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
103106
if (NOT MSVC)
104107
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
@@ -158,6 +161,7 @@ set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
158161
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
159162
option(GGML_OPENMP "ggml: use OpenMP" ON)
160163
option(GGML_RPC "ggml: use RPC" OFF)
164+
option(GGML_AMX "ggml: use AMX" OFF)
161165
option(GGML_SYCL "ggml: use SYCL" OFF)
162166
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
163167
set (GGML_SYCL_TARGET "INTEL" CACHE STRING

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,6 +2489,7 @@ extern "C" {
24892489
GGML_API int ggml_cpu_has_avx512_vbmi(void);
24902490
GGML_API int ggml_cpu_has_avx512_vnni(void);
24912491
GGML_API int ggml_cpu_has_avx512_bf16(void);
2492+
GGML_API int ggml_cpu_has_amx_int8 (void);
24922493
GGML_API int ggml_cpu_has_fma (void);
24932494
GGML_API int ggml_cpu_has_neon (void);
24942495
GGML_API int ggml_cpu_has_sve (void);

ggml/src/CMakeLists.txt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,26 @@ if (GGML_LLAMAFILE)
267267
set(GGML_SOURCES_LLAMAFILE llamafile/sgemm.cpp)
268268
endif()
269269

270+
if (GGML_AMX)
271+
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
272+
else()
273+
set(GGML_AMX OFF)
274+
message(WARNING "AMX requires gcc version > 11.0. Turning off GGML_AMX.")
275+
endif()
276+
277+
if (GGML_AMX)
278+
message(STATUS "Using AMX")
279+
280+
list(APPEND GGML_CDEF_PUBLIC GGML_USE_AMX)
281+
282+
file(GLOB GGML_HEADERS_AMX "ggml-amx/*.h")
283+
list(APPEND GGML_HEADERS_AMX "../include/ggml-amx.h")
284+
285+
file(GLOB GGML_SOURCES_AMX "ggml-amx/*.cpp")
286+
list(APPEND GGML_SOURCES_AMX "ggml-amx.cpp")
287+
endif()
288+
endif()
289+
270290
if (GGML_CUDA)
271291
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
272292

@@ -1180,6 +1200,18 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
11801200
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
11811201
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
11821202
endif()
1203+
if (GGML_AMX_TILE)
1204+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
1205+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
1206+
endif()
1207+
if (GGML_AMX_INT8)
1208+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
1209+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
1210+
endif()
1211+
if (GGML_AMX_BF16)
1212+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
1213+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
1214+
endif()
11831215
elseif (GGML_AVX2)
11841216
list(APPEND ARCH_FLAGS /arch:AVX2)
11851217
elseif (GGML_AVX)
@@ -1215,6 +1247,15 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
12151247
if (GGML_AVX512_BF16)
12161248
list(APPEND ARCH_FLAGS -mavx512bf16)
12171249
endif()
1250+
if (GGML_AMX_TILE)
1251+
list(APPEND ARCH_FLAGS -mamx-tile)
1252+
endif()
1253+
if (GGML_AMX_INT8)
1254+
list(APPEND ARCH_FLAGS -mamx-int8)
1255+
endif()
1256+
if (GGML_AMX_BF16)
1257+
list(APPEND ARCH_FLAGS -mamx-bf16)
1258+
endif()
12181259
endif()
12191260
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
12201261
message(STATUS "PowerPC detected")
@@ -1340,6 +1381,7 @@ add_library(ggml
13401381
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
13411382
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
13421383
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
1384+
${GGML_SOURCES_AMX} ${GGML_HEADERS_AMX}
13431385
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
13441386
ggml-aarch64.c ggml-aarch64.h
13451387
)

ggml/src/ggml-backend.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,6 @@ bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type
329329
if (backend->device) {
330330
return ggml_backend_dev_supports_buft(backend->device, buft);
331331
}
332-
333332
return backend->iface.supports_buft(backend, buft);
334333
}
335334

@@ -550,6 +549,14 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
550549
#include "ggml-rpc.h"
551550
#endif
552551

552+
#ifndef __AMX_INT8__
553+
#undef GGML_USE_AMX
554+
#endif
555+
556+
#ifdef GGML_USE_AMX
557+
# include "ggml-amx.h"
558+
#endif
559+
553560
struct ggml_backend_registry {
554561
std::vector<ggml_backend_reg_t> backends;
555562
std::vector<ggml_backend_dev_t> devices;
@@ -570,6 +577,9 @@ struct ggml_backend_registry {
570577
#ifdef GGML_USE_RPC
571578
register_backend(ggml_backend_rpc_reg());
572579
#endif
580+
#ifdef GGML_USE_AMX
581+
register_backend(ggml_backend_amx_reg());
582+
#endif
573583

574584
// TODO: sycl, kompute, cann
575585

ggml/src/ggml.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23219,6 +23219,14 @@ int ggml_cpu_has_avx512_bf16(void) {
2321923219
#endif
2322023220
}
2322123221

23222+
int ggml_cpu_has_amx_int8(void) {
23223+
#if defined(__AMX_INT8__)
23224+
return 1;
23225+
#else
23226+
return 0;
23227+
#endif
23228+
}
23229+
2322223230
int ggml_cpu_has_fma(void) {
2322323231
#if defined(__FMA__)
2322423232
return 1;

0 commit comments

Comments
 (0)