@@ -74,95 +74,79 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
7474
7575 if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
7676 CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
77- (NOT CMAKE_OSX_ARCHITECTURES AND
78- NOT CMAKE_GENERATOR_PLATFORM_LWR AND
77+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
7978 CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$" ))
8079
8180 message (STATUS "ARM detected" )
8281
83- if (MSVC )
84- list ( APPEND ARCH_DEFINITIONS __aarch64__) # MSVC defines _M_ARM64 instead
85- list ( APPEND ARCH_DEFINITIONS __ARM_NEON )
86- list ( APPEND ARCH_DEFINITIONS __ARM_FEATURE_FMA )
87-
88- set (CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS} )
89- string (JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2" )
82+ if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang" )
83+ message (FATAL_ERROR "MSVC is not supported for ARM, use clang" )
84+ else ( )
85+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E )
86+ if ( NOT " ${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
87+ list ( APPEND ARCH_FLAGS -mfp16-format=ieee )
88+ endif ( )
9089
91- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
92- if (GGML_COMPILER_SUPPORT_DOTPROD)
93- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
90+ if (GGML_NATIVE)
91+ list (APPEND ARCH_FLAGS -march=native)
9492
95- message (STATUS "ARM feature DOTPROD enabled" )
96- endif ()
93+ set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
9794
98- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
95+ # -march=native does not always enable all the features in some compilers,
96+ # so we check for them manually and enable them if available
9997
100- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
101- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
98+ include (CheckCXXSourceRuns)
10299
103- message (STATUS "ARM feature MATMUL_INT8 enabled" )
104- endif ()
100+ set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS} +dotprod" )
101+ check_cxx_source_runs(
102+ "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
103+ GGML_COMPILER_SUPPORT_DOTPROD)
104+ if (GGML_COMPILER_SUPPORT_DOTPROD)
105+ set (ARCH_FLAGS "${ARCH_FLAGS} +dotprod" )
106+ list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
107+ endif ()
105108
106- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
107- if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
108- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
109+ set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS} +i8mm" )
110+ check_cxx_source_runs(
111+ "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
112+ GGML_COMPILER_SUPPORT_I8MM)
113+ if (GGML_COMPILER_SUPPORT_I8MM)
114+ set (ARCH_FLAGS "${ARCH_FLAGS} +i8mm" )
115+ list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
116+ endif ()
109117
110- message (STATUS "ARM feature FP16_VECTOR_ARITHMETIC enabled" )
111- endif ()
118+ set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
112119
113- set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV} )
114- else ()
115- if (GGML_NATIVE)
116- list (APPEND ARCH_FLAGS -mcpu=native)
117-
118- # Show enabled features
119- execute_process (
120- COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
121- INPUT_FILE "/dev/null"
122- OUTPUT_VARIABLE ARM_FEATURE
123- RESULT_VARIABLE ARM_FEATURE_RESULT
124- )
125- if (ARM_FEATURE_RESULT)
126- message (WARNING "Failed to get ARM features" )
127- else ()
128- foreach (feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
129- string (FIND "${ARM_FEATURE} " "__ARM_FEATURE_${feature} 1" feature_pos)
130- if (NOT ${feature_pos} EQUAL -1)
131- message (STATUS "ARM feature ${feature} enabled" )
132- endif ()
133- endforeach ()
134- endif ()
135120 else ()
136- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
137- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
138- list (APPEND ARCH_FLAGS -mfp16-format=ieee)
139- endif ()
140- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
141- # Raspberry Pi 1, Zero
142- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
121+ if (GGML_CPU_ARM_ARCH)
122+ list (APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH} )
143123 endif ()
144- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
145- if ("${CMAKE_SYSTEM_NAME} " STREQUAL "Android" )
146- # Android armeabi-v7a
147- list (APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
148- else ()
149- # Raspberry Pi 2
150- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
124+ endif ()
125+
126+ # show enabled features
127+ execute_process (
128+ COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
129+ INPUT_FILE "/dev/null"
130+ OUTPUT_VARIABLE ARM_FEATURE
131+ RESULT_VARIABLE ARM_FEATURE_RESULT
132+ )
133+ if (ARM_FEATURE_RESULT)
134+ message (FATAL_ERROR "Failed to get ARM features" )
135+ else ()
136+ foreach (feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
137+ string (FIND "${ARM_FEATURE} " "__ARM_FEATURE_${feature} 1" feature_pos)
138+ if (NOT ${feature_pos} EQUAL -1)
139+ message (STATUS "ARM feature ${feature} enabled" )
151140 endif ()
152- endif ()
153- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
154- # Android arm64-v8a
155- # Raspberry Pi 3, 4, Zero 2 (32-bit)
156- list (APPEND ARCH_FLAGS -mno-unaligned-access)
157- endif ()
158- if (GGML_SVE)
159- list (APPEND ARCH_FLAGS -march=armv8.6-a+sve)
160- endif ()
141+ endforeach ()
161142 endif ()
162143 endif ()
163144 elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
164145 (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
165146 CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$" ))
147+
148+ message (STATUS "x86 detected" )
149+
166150 if (MSVC )
167151 # instruction set detection for MSVC only
168152 if (GGML_NATIVE)
0 commit comments