diff --git a/lib/meson.build b/lib/meson.build index 44ced53..2f9f06c 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -6,7 +6,7 @@ if cpp.has_argument('-march=haswell') 'x86simdsort-avx2.cpp', ), include_directories : [src], - cpp_args : ['-march=haswell'], + cpp_args : meson.get_compiler('cpp').get_id() == 'msvc' ? ['/arch:AVX2'] : ['-march=haswell'], gnu_symbol_visibility : 'inlineshidden', dependencies: [omp_dep], ) @@ -18,7 +18,7 @@ if cpp.has_argument('-march=skylake-avx512') 'x86simdsort-skx.cpp', ), include_directories : [src], - cpp_args : ['-march=skylake-avx512'], + cpp_args : meson.get_compiler('cpp').get_id() == 'msvc' ? ['/arch:AVX512'] : ['-march=skylake-avx512'], gnu_symbol_visibility : 'inlineshidden', dependencies: [omp_dep], ) @@ -30,7 +30,7 @@ if cpp.has_argument('-march=icelake-client') 'x86simdsort-icl.cpp', ), include_directories : [src], - cpp_args : ['-march=icelake-client'], + cpp_args : meson.get_compiler('cpp').get_id() == 'msvc' ? ['/arch:AVX512'] : ['-march=icelake-client'], gnu_symbol_visibility : 'inlineshidden', dependencies: [omp_dep], ) @@ -42,7 +42,7 @@ if cancompilefp16 'x86simdsort-spr.cpp', ), include_directories : [src], - cpp_args : ['-march=sapphirerapids'], + cpp_args : meson.get_compiler('cpp').get_id() == 'msvc' ? ['/arch:AVX512'] : ['-march=sapphirerapids'], gnu_symbol_visibility : 'inlineshidden', dependencies: [omp_dep], ) diff --git a/lib/x86simdsort.cpp b/lib/x86simdsort.cpp index 8ef9aad..8e6ad8d 100644 --- a/lib/x86simdsort.cpp +++ b/lib/x86simdsort.cpp @@ -1,6 +1,12 @@ +#if defined(_MSC_VER) +# define XSS_ATTRIBUTE_CONSTRUCTOR +#else +# define XSS_ATTRIBUTE_CONSTRUCTOR __attribute__((constructor)) +#endif #include "x86simdsort.h" #include "x86simdsort-internal.h" #include "x86simdsort-scalar.h" +#include "x86simdsortcpuid.h" #include #include #include @@ -12,23 +18,23 @@ static int check_cpu_feature_support(std::string_view cpufeature) if ((cpufeature == "avx512_spr") && (!disable_avx512)) #if defined(__FLT16_MAX__) && !defined(__INTEL_LLVM_COMPILER) \ && (!defined(__clang_major__) || __clang_major__ >= 18) - return __builtin_cpu_supports("avx512f") - && __builtin_cpu_supports("avx512fp16") - && __builtin_cpu_supports("avx512vbmi2"); + return xss_cpu_supports("avx512f") + && xss_cpu_supports("avx512fp16") + && xss_cpu_supports("avx512vbmi2"); #else return 0; #endif else if ((cpufeature == "avx512_icl") && (!disable_avx512)) - return __builtin_cpu_supports("avx512f") - && __builtin_cpu_supports("avx512vbmi2") - && __builtin_cpu_supports("avx512bw") - && __builtin_cpu_supports("avx512vl"); + return xss_cpu_supports("avx512f") + && xss_cpu_supports("avx512vbmi2") + && xss_cpu_supports("avx512bw") + && xss_cpu_supports("avx512vl"); else if ((cpufeature == "avx512_skx") && (!disable_avx512)) - return __builtin_cpu_supports("avx512f") - && __builtin_cpu_supports("avx512dq") - && __builtin_cpu_supports("avx512vl"); + return xss_cpu_supports("avx512f") + && xss_cpu_supports("avx512dq") + && xss_cpu_supports("avx512vl"); else if (cpufeature == "avx2") - return __builtin_cpu_supports("avx2"); + return xss_cpu_supports("avx2"); return 0; } @@ -121,11 +127,11 @@ constexpr bool IS_TYPE_FLOAT16() /* runtime dispatch mechanism */ #define DISPATCH(func, TYPE, ISA) \ - DECLARE_INTERNAL_##func(TYPE) static __attribute__((constructor)) void \ + DECLARE_INTERNAL_##func(TYPE) static XSS_ATTRIBUTE_CONSTRUCTOR void \ CAT(CAT(resolve_, func), TYPE)(void) \ { \ CAT(CAT(internal_, func), TYPE) = &xss::scalar::func; \ - __builtin_cpu_init(); \ + xss_cpu_init(); \ std::string_view preferred_cpu = find_preferred_cpu(ISA); \ if constexpr (dispatch_requested("avx512", ISA)) { \ if (preferred_cpu.find("avx512") != std::string_view::npos) { \ @@ -248,12 +254,12 @@ DISPATCH_ALL(argselect, } #define DISPATCH_KV_FUNC(func, TYPE1, TYPE2, ISA) \ - static __attribute__((constructor)) void CAT( \ + static XSS_ATTRIBUTE_CONSTRUCTOR void CAT( \ CAT(CAT(CAT(resolve_, func), _), TYPE1), TYPE2)(void) \ { \ CAT(CAT(CAT(CAT(internal_, func), _), TYPE1), TYPE2) \ = &xss::scalar::func; \ - __builtin_cpu_init(); \ + xss_cpu_init(); \ std::string_view preferred_cpu = find_preferred_cpu(ISA); \ if constexpr (dispatch_requested("avx512", ISA)) { \ if (preferred_cpu.find("avx512") != std::string_view::npos) { \ diff --git a/lib/x86simdsort.h b/lib/x86simdsort.h index 2e47b6a..3610eae 100644 --- a/lib/x86simdsort.h +++ b/lib/x86simdsort.h @@ -6,8 +6,13 @@ #include #include -#define XSS_EXPORT_SYMBOL __attribute__((visibility("default"))) -#define XSS_HIDE_SYMBOL __attribute__((visibility("hidden"))) +#if defined(_MSC_VER) +# define XSS_EXPORT_SYMBOL __declspec(dllexport) +# define XSS_HIDE_SYMBOL +#else +# define XSS_EXPORT_SYMBOL __attribute__((visibility("default"))) +# define XSS_HIDE_SYMBOL __attribute__((visibility("hidden"))) +#endif #define UNUSED(x) (void)(x) namespace x86simdsort { diff --git a/lib/x86simdsortcpuid.h b/lib/x86simdsortcpuid.h new file mode 100644 index 0000000..bbe5096 --- /dev/null +++ b/lib/x86simdsortcpuid.h @@ -0,0 +1,41 @@ +#ifndef X86SIMDSORT_CPUID_H +#define X86SIMDSORT_CPUID_H + +#include +#include +#include + +static std::unordered_map xss_cpu_features; + +inline void xss_cpu_init() { + int cpuInfo[4] = {0}; + // Check AVX2 + __cpuid(cpuInfo, 0); + int nIds = cpuInfo[0]; + __cpuid(cpuInfo, 1); + bool osxsave = (cpuInfo[2] & (1 << 27)) != 0; + bool avx = (cpuInfo[2] & (1 << 28)) != 0; + __cpuid(cpuInfo, 7); + bool avx2 = (cpuInfo[1] & (1 << 5)) != 0; + bool avx512f = (cpuInfo[1] & (1 << 16)) != 0; + bool avx512dq = (cpuInfo[1] & (1 << 17)) != 0; + bool avx512bw = (cpuInfo[1] & (1 << 30)) != 0; + bool avx512vl = (cpuInfo[1] & (1 << 31)) != 0; + bool avx512vbmi2 = (cpuInfo[2] & (1 << 6)) != 0; + bool avx512fp16 = (cpuInfo[3] & (1 << 23)) != 0; + // Store results + xss_cpu_features["avx2"] = avx2; + xss_cpu_features["avx512f"] = avx512f; + xss_cpu_features["avx512dq"] = avx512dq; + xss_cpu_features["avx512bw"] = avx512bw; + xss_cpu_features["avx512vl"] = avx512vl; + xss_cpu_features["avx512vbmi2"] = avx512vbmi2; + xss_cpu_features["avx512fp16"] = avx512fp16; +} + +inline bool xss_cpu_supports(const char* feature) { + auto it = xss_cpu_features.find(feature); + return it != xss_cpu_features.end() && it->second; +} + +#endif // X86SIMDSORT_CPUID_H diff --git a/meson.build b/meson.build index 0b826f0..b56553a 100644 --- a/meson.build +++ b/meson.build @@ -10,33 +10,6 @@ bench = include_directories('benchmarks') utils = include_directories('utils') tests = include_directories('tests') -# Add IPP sort to benchmarks: -benchipp = false -ipplink = [] -if get_option('build_ippbench') - benchipp = true - ipplink = ['-lipps', '-lippcore'] -endif - -# Essentially '-Werror' for the sanitizers; all problems become fatal with this set -if get_option('fatal_sanitizers') - add_project_arguments([ '-fno-sanitize-recover=all' ], language: 'cpp') -endif - -# Add google vqsort to benchmarks: -benchvq = false -if get_option('build_vqsortbench') - benchvq = true -endif - -# openMP: -omp = [] -omp_dep = [] -if get_option('use_openmp') - omp = dependency('openmp', required : true) - omp_dep = declare_dependency(dependencies: omp, compile_args: ['-DXSS_USE_OPENMP']) -endif - fp16code = '''#include int main() { __m512h temp = _mm512_set1_ph(1.0f); @@ -47,27 +20,13 @@ int main() { cancompilefp16 = cpp.compiles(fp16code, args:'-march=sapphirerapids') subdir('lib') -if get_option('lib_type') == 'shared' - libsimdsort = shared_library('x86simdsortcpp', +libsimdsort = shared_library('x86simdsortcpp', 'lib/x86simdsort.cpp', include_directories : [src, utils, lib], link_with : [libtargets], - dependencies: [omp_dep], - gnu_symbol_visibility : 'inlineshidden', install : true, soversion : 1, ) -else - libsimdsort = static_library('x86simdsortcpp', - 'lib/x86simdsort.cpp', - include_directories : [src, utils, lib], - link_with : [libtargets], - dependencies: [omp_dep], - gnu_symbol_visibility : 'inlineshidden', - install : true, - pic: true, - ) -endif pkg_mod = import('pkgconfig') pkg_mod.generate(libraries : libsimdsort, @@ -76,38 +35,6 @@ pkg_mod.generate(libraries : libsimdsort, filebase : 'x86simdsortcpp', description : 'C++ template library for high performance SIMD based sorting routines.') -# Create a new dependency variable making it easy to use this as a subproject: -x86simdsortcpp_dep = declare_dependency( - include_directories: include_directories('lib'), - link_with: libsimdsort, -) - -# Build test suite if option build_tests set to true -if get_option('build_tests') - gtest_dep = dependency('gtest_main', required : true, static: false) - subdir('tests') - testexe = executable('testexe', - include_directories : [lib, utils], - dependencies : [gtest_dep, x86simdsortcpp_dep], - link_whole : [libtests], - ) - test('x86 simd sort tests', testexe) -endif - -# Build benchmarking suite if option build_benchmarks is set to true - -if get_option('build_benchmarks') - gbench_dep = dependency('benchmark', required : true, static: false) - thread_dep = dependency('threads') # libbenchmark could need pthread_create - subdir('benchmarks') - benchexe = executable('benchexe', - include_directories : [src, lib, utils, bench], - dependencies : [gbench_dep, thread_dep, x86simdsortcpp_dep], - link_args: ['-lbenchmark_main', ipplink], - link_whole : [libbench], - ) -endif - summary({ 'Can compile AVX-512 FP16 ISA': cancompilefp16, 'Build test content': get_option('build_tests'),