Skip to content

Commit e6197d6

Browse files
committed
Add explicit -mpopcnt to avx2 and avx512 builds.
Zig adds native support for the CPU being built on. So if we support popcnt then we don't need an explicit -mpopcnt. However adding any -m options appears to disable that native support, so -mavx2 doesn't enable -mpopcnt, yet our auto-detection previously claimed it was necessary. This means building on a CPU with popcnt but no avx2 will fail to build (with zig) when targetting the avx2 platform. Therefore the only reliable way to check is the combinations actively in use. So if we want to compile code using popcnt + avx2 then we have to test -mpopcnt -mavx2 together. It's problematic and tricky to manage in autoconf. Fixes #109
1 parent cd1f950 commit e6197d6

File tree

3 files changed

+20
-53
lines changed

3 files changed

+20
-53
lines changed

configure.ac

Lines changed: 18 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -129,68 +129,33 @@ dnl AC_CHECK_LIB([bsc], [bsc_compress], [
129129
dnl LIBS="-lbsc $LIBS"
130130
dnl AC_DEFINE([HAVE_LIBBSC],1,[Define to 1 if you have the libbsc library.])])
131131

132-
dnl Count parts needed to build rANS_static32x16pr_sse4.c
133-
sse4_prerequisites=""
134-
135-
dnl Check if we can use our SSSE3 implementations of rANS 32x16 codec.
136-
HTS_CHECK_COMPILE_FLAGS_NEEDED([ssse3], [-mssse3], [AC_LANG_PROGRAM([[
137-
#ifdef __x86_64__
138-
#include "x86intrin.h"
139-
#endif
140-
]],[[
141-
#ifdef __x86_64__
142-
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
143-
__m128i c = _mm_shuffle_epi8(a, b);
144-
return *((char *) &c);
145-
#endif
146-
]])], [
147-
MSSSE3="$flags_needed"
148-
sse4_prerequisites="o$sse4_prerequisites"
149-
AC_SUBST([MSSSE3])
150-
AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.])
151-
])
152-
153-
dnl Check if we can use popcnt instructions
154-
HTS_CHECK_COMPILE_FLAGS_NEEDED([popcnt], [-mpopcnt], [AC_LANG_PROGRAM([[
155-
#ifdef __x86_64__
156-
#include "x86intrin.h"
157-
#endif
158-
]],[[
159-
#ifdef __x86_64__
160-
unsigned int i = _mm_popcnt_u32(1);
161-
return i != 1;
162-
#endif
163-
]])], [
164-
MPOPCNT="$flags_needed"
165-
sse4_prerequisites="o$sse4_prerequisites"
166-
AC_SUBST([MPOPCNT])
167-
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
168-
])
169-
170-
dnl Check if we can use our SSE4.1 too. This *may* always imply SSSE3?
171-
dnl It may be easier just to target an old era of cpu than -mssse3 -msse4.1
172-
dnl -mpopcnt. Eg -march=nehalem. I don't know how wide spread that is.
173-
HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1], [AC_LANG_PROGRAM([[
132+
dnl Check if we can use our SSE4.1 too.
133+
dnl Our SSE4 codec uses SSE4.1, SSSE3 (shuffle) and POPCNT, so we check all 3
134+
dnl together. This helps Zig builds which don't work well if we test each
135+
dnl individually.
136+
HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1 -mssse3 -mpopcnt], [AC_LANG_PROGRAM([[
174137
#ifdef __x86_64__
175138
#include "x86intrin.h"
176139
#endif
177140
]],[[
178141
#ifdef __x86_64__
179142
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
180-
__m128i c = _mm_max_epu32(a, b);
181-
return *((char *) &c);
143+
__m128i c = _mm_shuffle_epi8(_mm_max_epu32(a, b), b);
144+
return _mm_popcnt_u32(*((char *) &c));
182145
#endif
183146
]])], [
184147
MSSE4_1="$flags_needed"
185-
sse4_prerequisites="o$sse4_prerequisites"
148+
build_rans_sse4=yes
186149
AC_SUBST([MSSE4_1])
187150
AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled.])
151+
AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.])
152+
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
188153
])
189-
AM_CONDITIONAL([RANS_32x16_SSE4],[test "x$sse4_prerequisites" = "xooo"])
154+
AM_CONDITIONAL([RANS_32x16_SSE4],[test "$build_rans_sse4" = yes])
190155

191156
dnl Check if we can use our AVX2 implementations.
192157
build_rans_avx2=no
193-
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2], [AC_LANG_PROGRAM([[
158+
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2 -mpopcnt], [AC_LANG_PROGRAM([[
194159
#ifdef __x86_64__
195160
#include "x86intrin.h"
196161
#endif
@@ -199,34 +164,36 @@ HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2], [AC_LANG_PROGRAM([[
199164
__m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
200165
__m256i b = _mm256_add_epi32(a, a);
201166
long long c = _mm256_extract_epi64(b, 0);
202-
return (int) c;
167+
return _mm_popcnt_u32((int)c);
203168
#endif
204169
]])], [
205170
MAVX2="$flags_needed"
206171
build_rans_avx2=yes
207172
AC_SUBST([MAVX2])
208173
AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.])
174+
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
209175
])
210176
AM_CONDITIONAL([RANS_32x16_AVX2],[test "$build_rans_avx2" = yes])
211177

212-
dnl Check also if we have AVX512. If so this overrides AVX2
178+
dnl Check also if we have AVX512.
213179
build_rans_avx512=no
214-
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f], [AC_LANG_PROGRAM([[
180+
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f -mpopcnt], [AC_LANG_PROGRAM([[
215181
#ifdef __x86_64__
216182
#include "x86intrin.h"
217183
#endif
218184
]],[[
219185
#ifdef __x86_64__
220186
__m512i a = _mm512_set1_epi32(1);
221187
__m512i b = _mm512_add_epi32(a, a);
222-
return *((char *) &b);
188+
return _mm_popcnt_u32(*((char *) &b));
223189
#endif
224190
]])], [
225191
MAVX512="$flags_needed"
226192
build_rans_avx512=yes
227193
AC_SUBST([MAVX512])
228194
AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.])
229195
])
196+
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
230197
AM_CONDITIONAL([RANS_32x16_AVX512],[test "$build_rans_avx512" = yes])
231198

232199
AC_SUBST([HTSCODECS_SIMD_SRC])

htscodecs/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ noinst_LTLIBRARIES =
7272
if RANS_32x16_SSE4
7373
noinst_LTLIBRARIES += librANS_static32x16pr_sse4.la
7474
librANS_static32x16pr_sse4_la_SOURCES = rANS_static32x16pr_sse4.c
75-
librANS_static32x16pr_sse4_la_CFLAGS = @MSSE4_1@ @MSSSE3@ @MPOPCNT@
75+
librANS_static32x16pr_sse4_la_CFLAGS = @MSSE4_1@
7676
libhtscodecs_la_LIBADD += librANS_static32x16pr_sse4.la
7777
endif
7878
if RANS_32x16_AVX2

m4/hts_check_compile_flags_needed.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ AC_CACHE_CHECK([_AC_LANG compiler flags needed for $1], CACHEVAR, [
5050
[ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
5151
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $6 $2"
5252
AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])],
53-
[AS_VAR_SET(CACHEVAR,[$2])],
53+
[AS_VAR_SET(CACHEVAR,["$2"])],
5454
[AS_VAR_SET(CACHEVAR,[unsupported])])
5555
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])])
5656
AS_VAR_IF(CACHEVAR,unsupported, [

0 commit comments

Comments
 (0)