|
| 1 | +# DetectSimdAndAlignment.cmake |
| 2 | +# |
| 3 | +# Detect SIMD architecture family, SIMD level and a reasonable alignment value. |
| 4 | +# |
| 5 | +# Exposed cache variables: |
| 6 | +# SIMD_ARCH_FAMILY : x86 / ARM / PPC / UNKNOWN |
| 7 | +# SIMD_LEVEL : AVX512 / AVX2 / SSE2 / NEON / ALTIVEC / SCALAR |
| 8 | +# SIMD_ALIGNMENT : integer, in bytes (16, 32, 64, ...) |
| 9 | +# |
| 10 | +# Optional (if you want a configured header): |
| 11 | +# SIMD_CONFIG_HEADER : path to the generated header (see bottom). |
| 12 | +# |
| 13 | +# Usage: |
| 14 | +# include(cmake/DetectSimdAndAlignment.cmake) |
| 15 | +# message(STATUS "SIMD: ${SIMD_ARCH_FAMILY} ${SIMD_LEVEL}, alignment=${SIMD_ALIGNMENT}") |
| 16 | +# |
| 17 | +# # Example: propagate as defines |
| 18 | +# target_compile_definitions(my_target PRIVATE |
| 19 | +# SIMD_ALIGNMENT=${SIMD_ALIGNMENT} |
| 20 | +# SIMD_LEVEL_${SIMD_LEVEL} |
| 21 | +# ) |
| 22 | +# DetectSimdAndAlignment.cmake - COMPLETE: x86 + ARM NEON + NVIDIA + PowerPC |
| 23 | + |
| 24 | + |
| 25 | +include_guard(GLOBAL) # |
| 26 | + |
| 27 | +include(CheckCXXSourceCompiles) |
| 28 | +include(CheckCXXSourceRuns) # For runtime CPU detection fallback |
| 29 | + |
| 30 | +# ------------------------------ |
| 31 | +# 1. Detect architecture family |
| 32 | +# ------------------------------ |
| 33 | +if(NOT DEFINED SIMD_ARCH_FAMILY) |
| 34 | + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _simd_proc) |
| 35 | + |
| 36 | + if(_simd_proc MATCHES "x86_64|amd64|i[3-6]86") |
| 37 | + set(_detected_arch "x86") |
| 38 | + elseif(_simd_proc MATCHES "armv[0-9]+|aarch64|arm64") |
| 39 | + set(_detected_arch "ARM") |
| 40 | + elseif(_simd_proc MATCHES "ppc64(le|el)?|powerpc|ppc") |
| 41 | + set(_detected_arch "PPC") |
| 42 | + elseif(_simd_proc MATCHES "nvcl|sm_89|sm_90") |
| 43 | + set(_detected_arch "NVIDIA") |
| 44 | + else() |
| 45 | + set(_detected_arch "UNKNOWN") |
| 46 | + endif() |
| 47 | + |
| 48 | + set(SIMD_ARCH_FAMILY "${_detected_arch}" CACHE STRING "SIMD architecture family") |
| 49 | +endif() |
| 50 | + |
| 51 | +# Defaults |
| 52 | +set(SIMD_LEVEL "SCALAR" CACHE STRING "Detected SIMD level") |
| 53 | +set(SIMD_ALIGNMENT 16 CACHE STRING "Alignment in bytes") |
| 54 | +set(SIMD_HAS_FLOAT ON CACHE BOOL "Float SIMD support") |
| 55 | +set(SIMD_HAS_DOUBLE ON CACHE BOOL "Double SIMD support") |
| 56 | + |
| 57 | +# Save/restore flags helper |
| 58 | +set(_SIMD_SAVED_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") |
| 59 | +macro(_simd_restore_flags) |
| 60 | + if(DEFINED _SIMD_SAVED_REQUIRED_FLAGS) |
| 61 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS}") |
| 62 | + endif() |
| 63 | +endmacro() |
| 64 | + |
| 65 | +# ------------------------------------------------ |
| 66 | +# 2. x86: SSE2 → AVX2 → AVX512 |
| 67 | +# ------------------------------------------------ |
| 68 | +if(SIMD_ARCH_FAMILY STREQUAL "x86") |
| 69 | + # AVX512 double (64-byte) |
| 70 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx512f -mavx512dq") |
| 71 | + check_cxx_source_compiles(" |
| 72 | + #include <immintrin.h> |
| 73 | + int main() { __m512d v = _mm512_set1_pd(1.0); (void)v; return 0; } |
| 74 | + " _HAVE_AVX512_DOUBLE) |
| 75 | + |
| 76 | + if(_HAVE_AVX512_DOUBLE) |
| 77 | + set(SIMD_LEVEL "AVX512" CACHE STRING "" FORCE) |
| 78 | + set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE) |
| 79 | + _simd_restore_flags() |
| 80 | + return() |
| 81 | + endif() |
| 82 | + |
| 83 | + # AVX2 double (32-byte) |
| 84 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx2") |
| 85 | + check_cxx_source_compiles(" |
| 86 | + #include <immintrin.h> |
| 87 | + int main() { __m256d v = _mm256_set1_pd(1.0); (void)v; return 0; } |
| 88 | + " _HAVE_AVX2_DOUBLE) |
| 89 | + |
| 90 | + if(_HAVE_AVX2_DOUBLE) |
| 91 | + set(SIMD_LEVEL "AVX2" CACHE STRING "" FORCE) |
| 92 | + set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE) |
| 93 | + _simd_restore_flags() |
| 94 | + return() |
| 95 | + endif() |
| 96 | + |
| 97 | + # SSE2 double minimum (16-byte) |
| 98 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -msse2") |
| 99 | + check_cxx_source_compiles(" |
| 100 | + #include <emmintrin.h> |
| 101 | + int main() { __m128d v = _mm_set1_pd(1.0); (void)v; return 0; } |
| 102 | + " _HAVE_SSE2_DOUBLE) |
| 103 | + |
| 104 | + if(_HAVE_SSE2_DOUBLE) |
| 105 | + set(SIMD_LEVEL "SSE2" CACHE STRING "" FORCE) |
| 106 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 107 | + _simd_restore_flags() |
| 108 | + return() |
| 109 | + endif() |
| 110 | + |
| 111 | +# -------------------------------------- |
| 112 | +# 3. ARM NEON - ALL FAMILIES |
| 113 | +# -------------------------------------- |
| 114 | +elseif(SIMD_ARCH_FAMILY STREQUAL "ARM") |
| 115 | + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _arm_proc) |
| 116 | + |
| 117 | + # AArch64 + SVE |
| 118 | + if(_arm_proc MATCHES "aarch64|arm64") |
| 119 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+sve") |
| 120 | + check_cxx_source_compiles(" |
| 121 | + #include <arm_sve.h> |
| 122 | + int main() { svfloat32_t v = svdup_f32(1.0f); (void)v; return 0; } |
| 123 | + " _HAVE_SVE) |
| 124 | + |
| 125 | + if(_HAVE_SVE) |
| 126 | + set(SIMD_LEVEL "SVE" CACHE STRING "" FORCE) |
| 127 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 128 | + _simd_restore_flags() |
| 129 | + return() |
| 130 | + endif() |
| 131 | + |
| 132 | + # AArch64 NEON (double safe) |
| 133 | + check_cxx_source_compiles(" |
| 134 | + #include <arm_neon.h> |
| 135 | + int main() { |
| 136 | + float64x2_t vd = vdupq_n_f64(1.0); |
| 137 | + float32x4_t vf = vdupq_n_f32(1.0f); |
| 138 | + (void)vd; (void)vf; return 0; |
| 139 | + }" _HAVE_NEON_AARCH64) |
| 140 | + |
| 141 | + if(_HAVE_NEON_AARCH64) |
| 142 | + set(SIMD_LEVEL "NEON_AARCH64" CACHE STRING "" FORCE) |
| 143 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 144 | + _simd_restore_flags() |
| 145 | + return() |
| 146 | + endif() |
| 147 | + |
| 148 | + # ARMv8 32-bit |
| 149 | + elseif(_arm_proc MATCHES "armv8") |
| 150 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+simd") |
| 151 | + check_cxx_source_compiles(" |
| 152 | + #include <arm_neon.h> |
| 153 | + int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; } |
| 154 | + " _HAVE_ARMv8_NEON) |
| 155 | + |
| 156 | + if(_HAVE_ARMv8_NEON) |
| 157 | + set(SIMD_LEVEL "NEON_ARMv8" CACHE STRING "" FORCE) |
| 158 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 159 | + set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) |
| 160 | + _simd_restore_flags() |
| 161 | + return() |
| 162 | + endif() |
| 163 | + |
| 164 | + # ARMv7 NEON |
| 165 | + elseif(_arm_proc MATCHES "armv7") |
| 166 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mfpu=neon -march=armv7-a") |
| 167 | + check_cxx_source_compiles(" |
| 168 | + #include <arm_neon.h> |
| 169 | + int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; } |
| 170 | + " _HAVE_ARMv7_NEON) |
| 171 | + |
| 172 | + if(_HAVE_ARMv7_NEON) |
| 173 | + set(SIMD_LEVEL "NEON_ARMv7" CACHE STRING "" FORCE) |
| 174 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 175 | + set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) |
| 176 | + _simd_restore_flags() |
| 177 | + return() |
| 178 | + endif() |
| 179 | + endif() |
| 180 | + |
| 181 | +# -------------------------------------- |
| 182 | +# 4. POWERPC - COMPLETE COVERAGE (NEW!) |
| 183 | +# -------------------------------------- |
| 184 | +elseif(SIMD_ARCH_FAMILY STREQUAL "PPC") |
| 185 | + |
| 186 | + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _ppc_proc) |
| 187 | + |
| 188 | + # === Power10+ (512-bit vectors, POWER10) |
| 189 | + # Note: Power10 needs -mcpu=power10 or -mtune=power10 |
| 190 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power10") |
| 191 | + check_cxx_source_compiles(" |
| 192 | + #include <altivec.h> |
| 193 | + int main() { |
| 194 | + vector double vd = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; // 512-bit |
| 195 | + vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f}; |
| 196 | + (void)vd; (void)vf; return 0; |
| 197 | + }" _HAVE_POWER10) |
| 198 | + |
| 199 | + if(_HAVE_POWER10) |
| 200 | + set(SIMD_LEVEL "POWER10" CACHE STRING "" FORCE) |
| 201 | + set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE) # 512-bit = 64 bytes |
| 202 | + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) |
| 203 | + set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE) |
| 204 | + _simd_restore_flags() |
| 205 | + return() |
| 206 | + endif() |
| 207 | + |
| 208 | + # === Power9 VSX (256-bit, POWER8+) |
| 209 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power9 -mvsx") |
| 210 | + check_cxx_source_compiles(" |
| 211 | + #include <altivec.h> |
| 212 | + int main() { |
| 213 | + vector double vd = {1.0,1.0,1.0,1.0}; // 256-bit VSX double |
| 214 | + vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f}; // 256-bit |
| 215 | + (void)vd; (void)vf; return 0; |
| 216 | + }" _HAVE_VSX_POWER9) |
| 217 | + |
| 218 | + if(_HAVE_VSX_POWER9) |
| 219 | + set(SIMD_LEVEL "VSX_POWER9" CACHE STRING "" FORCE) |
| 220 | + set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE) # 256-bit = 32 bytes |
| 221 | + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) |
| 222 | + set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE) |
| 223 | + _simd_restore_flags() |
| 224 | + return() |
| 225 | + endif() |
| 226 | + |
| 227 | + # === Power7+ VSX (128-bit double, POWER7+) |
| 228 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power7 -mvsx") |
| 229 | + check_cxx_source_compiles(" |
| 230 | + #include <altivec.h> |
| 231 | + int main() { |
| 232 | + vector double vd = {1.0,1.0}; // VSX 128-bit double |
| 233 | + (void)vd; return 0; |
| 234 | + }" _HAVE_VSX_POWER7) |
| 235 | + |
| 236 | + if(_HAVE_VSX_POWER7) |
| 237 | + set(SIMD_LEVEL "VSX_POWER7" CACHE STRING "" FORCE) |
| 238 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 239 | + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) |
| 240 | + set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE) |
| 241 | + _simd_restore_flags() |
| 242 | + return() |
| 243 | + endif() |
| 244 | + |
| 245 | + # === Classic AltiVec/VMX (PowerPC baseline, 128-bit) |
| 246 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -maltivec -mabi=altivec") |
| 247 | + check_cxx_source_compiles(" |
| 248 | + #include <altivec.h> |
| 249 | + int main() { |
| 250 | + vector float vf = (vector float){1.0f,1.0f,1.0f,1.0f}; |
| 251 | + (void)vf; return 0; |
| 252 | + }" _HAVE_ALTIVEC) |
| 253 | + |
| 254 | + if(_HAVE_ALTIVEC) |
| 255 | + set(SIMD_LEVEL "ALTIVEC" CACHE STRING "" FORCE) |
| 256 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 257 | + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) |
| 258 | + set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) # AltiVec: float primary |
| 259 | + _simd_restore_flags() |
| 260 | + return() |
| 261 | + endif() |
| 262 | + |
| 263 | +# -------------------------------------- |
| 264 | +# 5. NVIDIA GH200 (sm_89) |
| 265 | +# -------------------------------------- |
| 266 | +elseif(SIMD_ARCH_FAMILY STREQUAL "NVIDIA") |
| 267 | + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} --gpu-arch=sm_89") |
| 268 | + check_cxx_source_compiles(" |
| 269 | + #include <cuda_runtime.h> |
| 270 | + int main() { double d = 1.0; (void)d; return 0; } |
| 271 | + " _HAVE_CUDA_SM89) |
| 272 | + |
| 273 | + if(_HAVE_CUDA_SM89) |
| 274 | + set(SIMD_LEVEL "CUDA_SM89" CACHE STRING "" FORCE) |
| 275 | + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) |
| 276 | + _simd_restore_flags() |
| 277 | + return() |
| 278 | + endif() |
| 279 | + |
| 280 | +# -------------------------------------- |
| 281 | +# 6. Fallback |
| 282 | +# -------------------------------------- |
| 283 | +else() |
| 284 | + _simd_restore_flags() |
| 285 | + return() |
| 286 | +endif() |
| 287 | + |
| 288 | +_simd_restore_flags() |
0 commit comments