@@ -111,70 +111,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
111111 endif ()
112112
113113 set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV} )
114- elseif ( APPLE )
114+ else ( )
115115 if (GGML_NATIVE)
116- set (USER_PROVIDED_MARCH FALSE )
117- foreach (flag_var IN ITEMS CMAKE_C_FLAGS CMAKE_CXX_FLAGS CMAKE_REQUIRED_FLAGS)
118- if ("${${flag_var} }" MATCHES "-march=[a-zA-Z0-9+._-]+" )
119- set (USER_PROVIDED_MARCH TRUE )
120- break ()
116+ list (APPEND ARCH_FLAGS -mcpu=native)
117+ else ()
118+ check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
119+ if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
120+ list (APPEND ARCH_FLAGS -mfp16-format=ieee)
121+ endif ()
122+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
123+ # Raspberry Pi 1, Zero
124+ list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
125+ endif ()
126+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
127+ if ("${CMAKE_SYSTEM_NAME} " STREQUAL "Android" )
128+ # Android armeabi-v7a
129+ list (APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
130+ else ()
131+ # Raspberry Pi 2
132+ list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
121133 endif ()
122- endforeach ()
123-
124- if (NOT USER_PROVIDED_MARCH)
125- set (MARCH_FLAGS "-march=armv8.2a" )
126-
127- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
128- if (GGML_COMPILER_SUPPORT_DOTPROD)
129- set (MARCH_FLAGS "${MARCH_FLAGS} +dotprod" )
130- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_DOTPROD)
131-
132- message (STATUS "ARM feature DOTPROD enabled" )
133- endif ()
134-
135- set (TEST_I8MM_FLAGS "-march=armv8.2a+i8mm" )
136-
137- set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
138- set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${TEST_I8MM_FLAGS} " )
139-
140- check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
141- if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
142- set (MARCH_FLAGS "${MARCH_FLAGS} +i8mm" )
143- list (APPEND ARCH_DEFINITIONS __ARM_FEATURE_MATMUL_INT8)
144-
145- message (STATUS "ARM feature MATMUL_INT8 enabled" )
146- endif ()
147-
148- set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
149-
150- list (APPEND ARCH_FLAGS "${MARCH_FLAGS} " )
151- endif ()
152- endif ()
153- else ()
154- check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
155- if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E} " STREQUAL "" )
156- list (APPEND ARCH_FLAGS -mfp16-format=ieee)
157- endif ()
158- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
159- # Raspberry Pi 1, Zero
160- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
161- endif ()
162- if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
163- if ("${CMAKE_SYSTEM_NAME} " STREQUAL "Android" )
164- # Android armeabi-v7a
165- list (APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
166- else ()
167- # Raspberry Pi 2
168- list (APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
169134 endif ()
170- endif ( )
171- if ( ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
172- # Android arm64-v8a
173- # Raspberry Pi 3, 4, Zero 2 (32-bit )
174- list ( APPEND ARCH_FLAGS -mno-unaligned-access )
175- endif ( )
176- if (GGML_SVE )
177- list ( APPEND ARCH_FLAGS -march=armv8.6-a+sve )
135+ if ( ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
136+ # Android arm64-v8a
137+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
138+ list ( APPEND ARCH_FLAGS -mno-unaligned-access )
139+ endif ( )
140+ if (GGML_SVE )
141+ list ( APPEND ARCH_FLAGS -march=armv8.6-a+sve )
142+ endif ( )
178143 endif ()
179144 endif ()
180145 elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
0 commit comments