Skip to content

Commit 7e7bddb

Browse files
committed
Autoconf improvements
Added a __builtin_prefetch compiler check (which is basically there on everything I've tried), and use this instead of SSE checks. Also removed the needless ax_with_libdeflate.m4 which isn't used anywhere.
1 parent 36e2470 commit 7e7bddb

File tree

3 files changed

+23
-111
lines changed

3 files changed

+23
-111
lines changed

configure.ac

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,17 @@ unable to be decompressed and arith tests will fail.])
109109
fi
110110
fi
111111

112+
dnl Check if __builtin_prefetch exists.
113+
AC_CACHE_CHECK([for __builtin_prefetch], [ax_cv_builtin_prefetch],[
114+
AC_LINK_IFELSE([AC_LANG_PROGRAM([], [__builtin_prefetch("")])],
115+
[ax_cv_builtin_prefetch=yes],
116+
[ax_cv_builtin_prefetch=no])
117+
])
118+
AS_IF([test "$ax_cv_builtin_prefetch" = "yes"],
119+
[AC_DEFINE(HAVE_BUILTIN_PREFETCH, 1,
120+
[Define to 1 if you have a __builtin_prefetch])],
121+
[])
122+
112123
dnl AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode], [
113124
dnl LIBS="-llzma $LIBS"
114125
dnl AC_DEFINE([HAVE_LIBLZMA],1,[Define to 1 if you have the liblzma library.])])
@@ -220,8 +231,6 @@ AM_CONDITIONAL([RANS_32x16_AVX512],[test "$build_rans_avx512" = yes])
220231

221232
AC_SUBST([HTSCODECS_SIMD_SRC])
222233

223-
AX_LIBDEFLATE
224-
225234
dnl Checks for header files.
226235
AC_HEADER_SYS_WAIT
227236
AC_CHECK_HEADERS(fcntl.h limits.h unistd.h malloc.h)

htscodecs/fqzcomp_qual.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -202,15 +202,14 @@ static int strat_opts[][12] = {
202202
};
203203
static int nstrats = sizeof(strat_opts) / sizeof(*strat_opts);
204204

205-
#ifdef __SSE__
206-
# include <xmmintrin.h>
207-
# define _mm_prefetch2 _mm_prefetch
205+
#ifdef HAVE_BUILTIN_PREFETCH
206+
static inline void mm_prefetch(void *x) {
207+
__builtin_prefetch(x);
208+
}
208209
#else
209-
#ifndef _MM_HINT_T0
210-
#define _MM_HINT_T0 0
211-
#endif
212-
static inline int _mm_prefetch2(void *x, int y) {
213-
return *(volatile int *)x;
210+
static inline void mm_prefetch(void *x) {
211+
// Fetch and discard is quite close to a genuine prefetch
212+
*(volatile int *)x;
214213
}
215214
#endif
216215

@@ -1119,19 +1118,19 @@ unsigned char *compress_block_fqz2f(int vers,
11191118
// Model has symbols sorted by frequency, so most common are at
11201119
// start. So while model is approx 1Kb, the first cache line is
11211120
// a big win.
1122-
_mm_prefetch2(&model.qual[l1], _MM_HINT_T0);
1121+
mm_prefetch(&model.qual[l1]);
11231122
unsigned char qm1 = pm->qmap[in[i + ++j]];
11241123
last = fqz_update_ctx(pm, &state, qm1); l2 = last;
11251124

1126-
_mm_prefetch2(&model.qual[l2], _MM_HINT_T0);
1125+
mm_prefetch(&model.qual[l2]);
11271126
unsigned char qm2 = pm->qmap[in[i + ++j]];
11281127
last = fqz_update_ctx(pm, &state, qm2); l3 = last;
11291128

1130-
_mm_prefetch2(&model.qual[l3], _MM_HINT_T0);
1129+
mm_prefetch(&model.qual[l3]);
11311130
unsigned char qm3 = pm->qmap[in[i + ++j]];
11321131
last = fqz_update_ctx(pm, &state, qm3); l4 = last;
11331132

1134-
_mm_prefetch2(&model.qual[l4], _MM_HINT_T0);
1133+
mm_prefetch(&model.qual[l4]);
11351134
unsigned char qm4 = pm->qmap[in[i + ++j]];
11361135
last = fqz_update_ctx(pm, &state, qm4);
11371136

@@ -1143,7 +1142,7 @@ unsigned char *compress_block_fqz2f(int vers,
11431142

11441143
while (state.p > 0) {
11451144
int l2 = last;
1146-
_mm_prefetch2(&model.qual[last], _MM_HINT_T0);
1145+
mm_prefetch(&model.qual[last]);
11471146
unsigned char qm = pm->qmap[in[i + ++j]];
11481147
last = fqz_update_ctx(pm, &state, qm);
11491148
SIMPLE_MODEL(QMAX,_encodeSymbol)(&model.qual[l2], &rc, qm);

m4/ax_with_libdeflate.m4

Lines changed: 0 additions & 96 deletions
This file was deleted.

0 commit comments

Comments
 (0)