@@ -4,6 +4,13 @@ using LoopVectorization.VectorizationBase: REGISTER_SIZE
4
4
# const LOOPVECBENCHDIR = joinpath(pkgdir(LoopVectorization), "benchmark")
5
5
include (joinpath (LOOPVECBENCHDIR, " looptests.jl" ))
6
6
7
+ const LIBCTEST = joinpath (LOOPVECBENCHDIR, " libctests.so" )
8
+ const LIBFTEST = joinpath (LOOPVECBENCHDIR, " libftests.so" )
9
+ const LIBICTEST = joinpath (LOOPVECBENCHDIR, " libictests.so" )
10
+ const LIBIFTEST = joinpath (LOOPVECBENCHDIR, " libiftests.so" )
11
+ const LIBEIGENTEST = joinpath (LOOPVECBENCHDIR, " libetest.so" )
12
+ const LIBIEIGENTEST = joinpath (LOOPVECBENCHDIR, " libietest.so" )
13
+
7
14
8
15
# requires Clang with polly to build
9
16
cfile = joinpath (LOOPVECBENCHDIR, " looptests.c" )
28
35
eigenfile = joinpath (LOOPVECBENCHDIR, " looptestseigen.cpp" )
29
36
if ! isfile (LIBEIGENTEST) || mtime (eigenfile) > mtime (LIBEIGENTEST)
30
37
# Clang seems to have trouble finding includes
31
- run (` g++ -O3 -march=native -mprefer-vector-width=$(8 REGISTER_SIZE) -DEIGEN_VECTORIZE_AVX512 -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST ` )
32
-
38
+ if LoopVectorization. VectorizationBase. AVX512F
39
+ run (` g++ -O3 -march=native -mprefer-vector-width=$(8 REGISTER_SIZE) -DEIGEN_VECTORIZE_AVX512 -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST ` )
40
+ else
41
+ run (` g++ -O3 -march=native -mprefer-vector-width=$(8 REGISTER_SIZE) -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST ` )
42
+ end
33
43
end
34
44
if ! isfile (LIBIEIGENTEST) || mtime (eigenfile) > mtime (LIBIEIGENTEST)
35
45
# run(`/usr/bin/clang++ -Ofast -march=native -mprefer-vector-width=$(8REGISTER_SIZE) -DEIGEN_VECTORIZE_AVX512 -I/usr/include/c++/9 -I/usr/include/c++/9/x86_64-generic-linux -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST`)
36
- run (` /usr/bin/clang++ -Ofast -march=native -mprefer-vector-width=$(8 REGISTER_SIZE) -DEIGEN_VECTORIZE_AVX512 -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST ` )
46
+ if LoopVectorization. VectorizationBase. AVX512F
47
+ run (` /usr/bin/clang++ -Ofast -march=native -mprefer-vector-width=$(8 REGISTER_SIZE) -DEIGEN_VECTORIZE_AVX512 -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST ` )
48
+ else
49
+ run (` /usr/bin/clang++ -Ofast -march=native -mprefer-vector-width=$(8 REGISTER_SIZE) -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBEIGENTEST ` )
50
+ end
37
51
# run(`icpc -fast -qopt-zmm-usage=high -fargument-noalias-global -qoverride-limits -I/usr/include/eigen3 -shared -fPIC $eigenfile -o $LIBIEIGENTEST`)
38
52
end
39
53
47
61
# end
48
62
49
63
50
- const LIBCTEST = joinpath (LOOPVECBENCHDIR, " libctests.so" )
51
- const LIBFTEST = joinpath (LOOPVECBENCHDIR, " libftests.so" )
52
- const LIBICTEST = joinpath (LOOPVECBENCHDIR, " libictests.so" )
53
- const LIBIFTEST = joinpath (LOOPVECBENCHDIR, " libiftests.so" )
54
- const LIBEIGENTEST = joinpath (LOOPVECBENCHDIR, " libetest.so" )
55
- const LIBIEIGENTEST = joinpath (LOOPVECBENCHDIR, " libietest.so" )
56
-
57
64
using MKL_jll, OpenBLAS_jll
58
65
59
66
const libMKL = Libdl. dlopen (MKL_jll. libmkl_rt)
@@ -105,9 +112,9 @@ function dgemmopenblas!(C::AbstractMatrix{Float64}, A::AbstractMatrix{Float64},
105
112
transA, transB, M, N, K, α, pA, ldA, pB, ldB, β, C, ldC
106
113
)
107
114
end
108
- mkl_set_num_threads (N:: Integer ) = ccall (MKL_SET_NUM_THREADS, Cvoid, (Ref{UInt32} ,), Ref ( N % UInt32) )
115
+ mkl_set_num_threads (N:: Integer ) = ccall (MKL_SET_NUM_THREADS, Cvoid, (Int32 ,), N % Int32 )
109
116
mkl_set_num_threads (1 )
110
- openblas_set_num_threads (N:: Integer ) = ccall (OPENBLAS_SET_NUM_THREADS, Cvoid, (Ref{ Int64} ,), Ref (N) )
117
+ openblas_set_num_threads (N:: Integer ) = ccall (OPENBLAS_SET_NUM_THREADS, Cvoid, (Int64,), N )
111
118
openblas_set_num_threads (1 )
112
119
function dgemvmkl! (y:: AbstractVector{Float64} , A:: AbstractMatrix{Float64} , x:: AbstractVector{Float64} )
113
120
transA = istransposed (A)
@@ -123,7 +130,7 @@ function dgemvmkl!(y::AbstractVector{Float64}, A::AbstractMatrix{Float64}, x::Ab
123
130
ccall (
124
131
DGEMV_MKL, Cvoid,
125
132
(Ref{UInt8}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Float64}, Ref{Int32}, Ref{Float64}, Ref{Float64}, Ref{Int32}),
126
- transA, M32, N32, α, A , ldA, x, incx, β, y, incy
133
+ transA, M32, N32, α, pA , ldA, x, incx, β, y, incy
127
134
)
128
135
end
129
136
function dgemvopenblas! (y:: AbstractVector{Float64} , A:: AbstractMatrix{Float64} , x:: AbstractVector{Float64} )
@@ -138,7 +145,7 @@ function dgemvopenblas!(y::AbstractVector{Float64}, A::AbstractMatrix{Float64},
138
145
ccall (
139
146
DGEMV_OpenBLAS, Cvoid,
140
147
(Ref{UInt8}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Int64}, Ref{Float64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Int64}),
141
- transA, M, N, α, A , ldA, x, incx, β, y, incy
148
+ transA, M, N, α, pA , ldA, x, incx, β, y, incy
142
149
)
143
150
end
144
151
0 commit comments