@@ -88,32 +88,45 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
8888 endif ()
8989
9090 if (GGML_NATIVE)
91- list (APPEND ARCH_FLAGS -mcpu=native)
92-
93- set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
94-
9591 # -mcpu=native does not always enable all the features in some compilers,
9692 # so we check for them manually and enable them if available
9793
94+ execute_process (
95+ COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
96+ INPUT_FILE "/dev/null"
97+ OUTPUT_QUIET
98+ ERROR_VARIABLE ARM_MCPU
99+ RESULT_VARIABLE ARM_MCPU_RESULT
100+ )
101+ if (NOT ARM_MCPU_RESULT)
102+ string (REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU} " )
103+ endif ()
104+ if ("${ARM_MCPU_FLAG} " STREQUAL "" )
105+ set (ARM_MCPU_FLAG -mcpu=native)
106+ message (STATUS "ARM -mcpu not found, -mcpu=native will be used" )
107+ endif ()
108+
109+ set (CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS} )
98110 include (CheckCXXSourceRuns)
99111
100- set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS } +dotprod" )
112+ set (CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG } +dotprod" )
101113 check_cxx_source_runs(
102114 "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
103115 GGML_COMPILER_SUPPORT_DOTPROD)
104116 if (GGML_COMPILER_SUPPORT_DOTPROD)
105- set (ARCH_FLAGS "${ARCH_FLAGS } +dotprod" )
117+ set (ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX } +dotprod" )
106118 endif ()
107119
108- set (CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS } +i8mm" )
120+ set (CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG } +i8mm" )
109121 check_cxx_source_runs(
110122 "#include <arm_neon.h>\n int main() { int8x16_t _a, _b; int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }"
111123 GGML_COMPILER_SUPPORT_I8MM)
112124 if (GGML_COMPILER_SUPPORT_I8MM)
113- set (ARCH_FLAGS "${ARCH_FLAGS } +i8mm" )
125+ set (ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX } +i8mm" )
114126 endif ()
115127
116128 set (CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE} )
129+ list (APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX} " )
117130
118131 else ()
119132 if (GGML_CPU_ARM_ARCH)
0 commit comments