diff --git a/README.md b/README.md index 6768314..61f4de5 100644 --- a/README.md +++ b/README.md @@ -80,16 +80,6 @@ benchmark](https://github.com/google/benchmark) frameworks respectively. You can configure meson to build them both by using `-Dbuild_tests=true` and `-Dbuild_benchmarks=true`. -### Note about building with avx512 by g++ v9 and v10 - -There is a risk when compile with avx512 by g++ v9 and v10, -as some `MMX Technology` instructions is used by g++ v9/v10 -without clearing fpu state. -Check [issue 154](https://github.com/intel/x86-simd-sort/issues/154) -for more details. - -Adding `g++` option `-mno-mmx`, which disables `MMX Technology` instructions, is a possible workaround. - ## Example usage #### Sort an array of floats diff --git a/src/xss-common-argsort.h b/src/xss-common-argsort.h index cf02b30..eb46bbe 100644 --- a/src/xss-common-argsort.h +++ b/src/xss-common-argsort.h @@ -575,6 +575,11 @@ X86_SIMD_SORT_INLINE void xss_argsort(T *arr, if (descending) { std::reverse(arg, arg + arrsize); } } + +#ifdef __MMX__ + // Workaround for compiler bug generating MMX instructions without emms + _mm_empty(); +#endif } template @@ -632,6 +637,11 @@ X86_SIMD_SORT_INLINE void xss_argselect(T *arr, argselect_( arr, arg, k, 0, arrsize - 1, 2 * (arrsize_t)log2(arrsize)); } + +#ifdef __MMX__ + // Workaround for compiler bug generating MMX instructions without emms + _mm_empty(); +#endif } template