Skip to content

Commit 065d6d0

Browse files
author
Mathieu Taillefumier
committed
Add alignment detection at configuration time
1 parent a24536d commit 065d6d0

File tree

2 files changed

+295
-3
lines changed

2 files changed

+295
-3
lines changed

CMakeLists.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,21 +146,25 @@ set(ALIGN32 " ")
146146

147147
message("${TM_ENABLE_ALIGNMENT}")
148148
if(${TM_ENABLE_ALIGNMENT} STREQUAL "auto")
149+
include(cmake/DetectSimdAndAlignment.cmake)
150+
message(STATUS "SIMD: ${SIMD_LEVEL} (${SIMD_ARCH_FAMILY}), align=${SIMD_ALIGNMENT}")
151+
endif()
152+
if (${TM_ENABLE_ALIGNMENT} STREQUAL "none")
149153
set(ALIGN_BASE "0x00")
150154
set(ALIGN " ")
151155
set(ALIGN_BASE32 "0x00")
152156
set(ALIGN32 " ")
153-
elseif(TM_ENABLE_ALIGNMENT EQUAL 16)
157+
elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "16") OR (${SIMD_ALIGNMENT} EQUAL 16))
154158
set(ALIGN_BASE "0x0F")
155159
set(ALIGN "__attribute__ ((aligned (16)))")
156160
set(ALIGN_BASE32 "0x0F")
157161
set(ALIGN32 "__attribute__ ((aligned (16)))")
158-
elseif(TM_ENABLE_ALIGNMENT EQUAL 32)
162+
elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "32") OR (${SIMD_ALIGNMENT} EQUAL 32))
159163
set(ALIGN_BASE "0x2F")
160164
set(ALIGN "__attribute__ ((aligned (32)))")
161165
set(ALIGN_BASE32 "0x2F")
162166
set(ALIGN32 "__attribute__ ((aligned (32)))")
163-
elseif(TM_ENABLE_ALIGNMENT EQUAL 64)
167+
elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "64") OR (${SIMD_ALIGNMENT} EQUAL 64))
164168
set(ALIGN_BASE "0x3F")
165169
set(ALIGN "__attribute__ ((aligned (64)))")
166170
set(ALIGN_BASE32 "0x3F")

cmake/DetectSimdAndAlignment.cmake

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
# DetectSimdAndAlignment.cmake
2+
#
3+
# Detect SIMD architecture family, SIMD level and a reasonable alignment value.
4+
#
5+
# Exposed cache variables:
6+
# SIMD_ARCH_FAMILY : x86 / ARM / PPC / UNKNOWN
7+
# SIMD_LEVEL : AVX512 / AVX2 / SSE2 / NEON / ALTIVEC / SCALAR
8+
# SIMD_ALIGNMENT : integer, in bytes (16, 32, 64, ...)
9+
#
10+
# Optional (if you want a configured header):
11+
# SIMD_CONFIG_HEADER : path to the generated header (see bottom).
12+
#
13+
# Usage:
14+
# include(cmake/DetectSimdAndAlignment.cmake)
15+
# message(STATUS "SIMD: ${SIMD_ARCH_FAMILY} ${SIMD_LEVEL}, alignment=${SIMD_ALIGNMENT}")
16+
#
17+
# # Example: propagate as defines
18+
# target_compile_definitions(my_target PRIVATE
19+
# SIMD_ALIGNMENT=${SIMD_ALIGNMENT}
20+
# SIMD_LEVEL_${SIMD_LEVEL}
21+
# )
22+
# DetectSimdAndAlignment.cmake - COMPLETE: x86 + ARM NEON + NVIDIA + PowerPC
23+
24+
25+
include_guard(GLOBAL) #
26+
27+
include(CheckCXXSourceCompiles)
28+
include(CheckCXXSourceRuns) # For runtime CPU detection fallback
29+
30+
# ------------------------------
31+
# 1. Detect architecture family
32+
# ------------------------------
33+
if(NOT DEFINED SIMD_ARCH_FAMILY)
34+
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _simd_proc)
35+
36+
if(_simd_proc MATCHES "x86_64|amd64|i[3-6]86")
37+
set(_detected_arch "x86")
38+
elseif(_simd_proc MATCHES "armv[0-9]+|aarch64|arm64")
39+
set(_detected_arch "ARM")
40+
elseif(_simd_proc MATCHES "ppc64(le|el)?|powerpc|ppc")
41+
set(_detected_arch "PPC")
42+
elseif(_simd_proc MATCHES "nvcl|sm_89|sm_90")
43+
set(_detected_arch "NVIDIA")
44+
else()
45+
set(_detected_arch "UNKNOWN")
46+
endif()
47+
48+
set(SIMD_ARCH_FAMILY "${_detected_arch}" CACHE STRING "SIMD architecture family")
49+
endif()
50+
51+
# Defaults
52+
set(SIMD_LEVEL "SCALAR" CACHE STRING "Detected SIMD level")
53+
set(SIMD_ALIGNMENT 16 CACHE STRING "Alignment in bytes")
54+
set(SIMD_HAS_FLOAT ON CACHE BOOL "Float SIMD support")
55+
set(SIMD_HAS_DOUBLE ON CACHE BOOL "Double SIMD support")
56+
57+
# Save/restore flags helper
58+
set(_SIMD_SAVED_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
59+
macro(_simd_restore_flags)
60+
if(DEFINED _SIMD_SAVED_REQUIRED_FLAGS)
61+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS}")
62+
endif()
63+
endmacro()
64+
65+
# ------------------------------------------------
66+
# 2. x86: SSE2 → AVX2 → AVX512
67+
# ------------------------------------------------
68+
if(SIMD_ARCH_FAMILY STREQUAL "x86")
69+
# AVX512 double (64-byte)
70+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx512f -mavx512dq")
71+
check_cxx_source_compiles("
72+
#include <immintrin.h>
73+
int main() { __m512d v = _mm512_set1_pd(1.0); (void)v; return 0; }
74+
" _HAVE_AVX512_DOUBLE)
75+
76+
if(_HAVE_AVX512_DOUBLE)
77+
set(SIMD_LEVEL "AVX512" CACHE STRING "" FORCE)
78+
set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE)
79+
_simd_restore_flags()
80+
return()
81+
endif()
82+
83+
# AVX2 double (32-byte)
84+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx2")
85+
check_cxx_source_compiles("
86+
#include <immintrin.h>
87+
int main() { __m256d v = _mm256_set1_pd(1.0); (void)v; return 0; }
88+
" _HAVE_AVX2_DOUBLE)
89+
90+
if(_HAVE_AVX2_DOUBLE)
91+
set(SIMD_LEVEL "AVX2" CACHE STRING "" FORCE)
92+
set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE)
93+
_simd_restore_flags()
94+
return()
95+
endif()
96+
97+
# SSE2 double minimum (16-byte)
98+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -msse2")
99+
check_cxx_source_compiles("
100+
#include <emmintrin.h>
101+
int main() { __m128d v = _mm_set1_pd(1.0); (void)v; return 0; }
102+
" _HAVE_SSE2_DOUBLE)
103+
104+
if(_HAVE_SSE2_DOUBLE)
105+
set(SIMD_LEVEL "SSE2" CACHE STRING "" FORCE)
106+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
107+
_simd_restore_flags()
108+
return()
109+
endif()
110+
111+
# --------------------------------------
112+
# 3. ARM NEON - ALL FAMILIES
113+
# --------------------------------------
114+
elseif(SIMD_ARCH_FAMILY STREQUAL "ARM")
115+
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _arm_proc)
116+
117+
# AArch64 + SVE
118+
if(_arm_proc MATCHES "aarch64|arm64")
119+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+sve")
120+
check_cxx_source_compiles("
121+
#include <arm_sve.h>
122+
int main() { svfloat32_t v = svdup_f32(1.0f); (void)v; return 0; }
123+
" _HAVE_SVE)
124+
125+
if(_HAVE_SVE)
126+
set(SIMD_LEVEL "SVE" CACHE STRING "" FORCE)
127+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
128+
_simd_restore_flags()
129+
return()
130+
endif()
131+
132+
# AArch64 NEON (double safe)
133+
check_cxx_source_compiles("
134+
#include <arm_neon.h>
135+
int main() {
136+
float64x2_t vd = vdupq_n_f64(1.0);
137+
float32x4_t vf = vdupq_n_f32(1.0f);
138+
(void)vd; (void)vf; return 0;
139+
}" _HAVE_NEON_AARCH64)
140+
141+
if(_HAVE_NEON_AARCH64)
142+
set(SIMD_LEVEL "NEON_AARCH64" CACHE STRING "" FORCE)
143+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
144+
_simd_restore_flags()
145+
return()
146+
endif()
147+
148+
# ARMv8 32-bit
149+
elseif(_arm_proc MATCHES "armv8")
150+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+simd")
151+
check_cxx_source_compiles("
152+
#include <arm_neon.h>
153+
int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; }
154+
" _HAVE_ARMv8_NEON)
155+
156+
if(_HAVE_ARMv8_NEON)
157+
set(SIMD_LEVEL "NEON_ARMv8" CACHE STRING "" FORCE)
158+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
159+
set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE)
160+
_simd_restore_flags()
161+
return()
162+
endif()
163+
164+
# ARMv7 NEON
165+
elseif(_arm_proc MATCHES "armv7")
166+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mfpu=neon -march=armv7-a")
167+
check_cxx_source_compiles("
168+
#include <arm_neon.h>
169+
int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; }
170+
" _HAVE_ARMv7_NEON)
171+
172+
if(_HAVE_ARMv7_NEON)
173+
set(SIMD_LEVEL "NEON_ARMv7" CACHE STRING "" FORCE)
174+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
175+
set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE)
176+
_simd_restore_flags()
177+
return()
178+
endif()
179+
endif()
180+
181+
# --------------------------------------
182+
# 4. POWERPC - COMPLETE COVERAGE (NEW!)
183+
# --------------------------------------
184+
elseif(SIMD_ARCH_FAMILY STREQUAL "PPC")
185+
186+
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _ppc_proc)
187+
188+
# === Power10+ (512-bit vectors, POWER10)
189+
# Note: Power10 needs -mcpu=power10 or -mtune=power10
190+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power10")
191+
check_cxx_source_compiles("
192+
#include <altivec.h>
193+
int main() {
194+
vector double vd = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; // 512-bit
195+
vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f};
196+
(void)vd; (void)vf; return 0;
197+
}" _HAVE_POWER10)
198+
199+
if(_HAVE_POWER10)
200+
set(SIMD_LEVEL "POWER10" CACHE STRING "" FORCE)
201+
set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE) # 512-bit = 64 bytes
202+
set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
203+
set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE)
204+
_simd_restore_flags()
205+
return()
206+
endif()
207+
208+
# === Power9 VSX (256-bit, POWER8+)
209+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power9 -mvsx")
210+
check_cxx_source_compiles("
211+
#include <altivec.h>
212+
int main() {
213+
vector double vd = {1.0,1.0,1.0,1.0}; // 256-bit VSX double
214+
vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f}; // 256-bit
215+
(void)vd; (void)vf; return 0;
216+
}" _HAVE_VSX_POWER9)
217+
218+
if(_HAVE_VSX_POWER9)
219+
set(SIMD_LEVEL "VSX_POWER9" CACHE STRING "" FORCE)
220+
set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE) # 256-bit = 32 bytes
221+
set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
222+
set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE)
223+
_simd_restore_flags()
224+
return()
225+
endif()
226+
227+
# === Power7+ VSX (128-bit double, POWER7+)
228+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power7 -mvsx")
229+
check_cxx_source_compiles("
230+
#include <altivec.h>
231+
int main() {
232+
vector double vd = {1.0,1.0}; // VSX 128-bit double
233+
(void)vd; return 0;
234+
}" _HAVE_VSX_POWER7)
235+
236+
if(_HAVE_VSX_POWER7)
237+
set(SIMD_LEVEL "VSX_POWER7" CACHE STRING "" FORCE)
238+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
239+
set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
240+
set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE)
241+
_simd_restore_flags()
242+
return()
243+
endif()
244+
245+
# === Classic AltiVec/VMX (PowerPC baseline, 128-bit)
246+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -maltivec -mabi=altivec")
247+
check_cxx_source_compiles("
248+
#include <altivec.h>
249+
int main() {
250+
vector float vf = (vector float){1.0f,1.0f,1.0f,1.0f};
251+
(void)vf; return 0;
252+
}" _HAVE_ALTIVEC)
253+
254+
if(_HAVE_ALTIVEC)
255+
set(SIMD_LEVEL "ALTIVEC" CACHE STRING "" FORCE)
256+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
257+
set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
258+
set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) # AltiVec: float primary
259+
_simd_restore_flags()
260+
return()
261+
endif()
262+
263+
# --------------------------------------
264+
# 5. NVIDIA GH200 (sm_89)
265+
# --------------------------------------
266+
elseif(SIMD_ARCH_FAMILY STREQUAL "NVIDIA")
267+
set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} --gpu-arch=sm_89")
268+
check_cxx_source_compiles("
269+
#include <cuda_runtime.h>
270+
int main() { double d = 1.0; (void)d; return 0; }
271+
" _HAVE_CUDA_SM89)
272+
273+
if(_HAVE_CUDA_SM89)
274+
set(SIMD_LEVEL "CUDA_SM89" CACHE STRING "" FORCE)
275+
set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
276+
_simd_restore_flags()
277+
return()
278+
endif()
279+
280+
# --------------------------------------
281+
# 6. Fallback
282+
# --------------------------------------
283+
else()
284+
_simd_restore_flags()
285+
return()
286+
endif()
287+
288+
_simd_restore_flags()

0 commit comments

Comments
 (0)