|
1 | 1 | module LoopVectorization
|
2 | 2 |
|
3 |
| -if (!isnothing(get(ENV, "TRAVIS_BRANCH", nothing)) || !isnothing(get(ENV, "APPVEYOR", nothing))) && isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optlevel")) |
4 |
| - @eval Base.Experimental.@optlevel 1 |
5 |
| -end |
| 3 | +# if (!isnothing(get(ENV, "TRAVIS_BRANCH", nothing)) || !isnothing(get(ENV, "APPVEYOR", nothing))) && isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optlevel")) |
| 4 | + # @eval Base.Experimental.@optlevel 1 |
| 5 | +# end |
6 | 6 |
|
7 |
| -using VectorizationBase, SIMDPirates, SLEEFPirates, UnPack, OffsetArrays |
8 |
| -using VectorizationBase: REGISTER_SIZE, extract_data, num_vector_load_expr, |
9 |
| - mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valmulsub, valadd, valsub, _MM, |
10 |
| - maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, subsetview, vzero, stridedpointer_for_broadcast, |
11 |
| - Static, Zero, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange, |
12 |
| - AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer, |
13 |
| - PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct, offsetprecalc, |
14 |
| - maybestaticfirst, maybestaticlast, scalar_less, scalar_greater, noalias!, gesp, gepbyte, pointerforcomparison, NativeTypes, staticmul, staticmuladd |
15 |
| -using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, |
16 |
| - reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum, |
17 |
| - sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!, |
18 |
| - vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, sizeequivalentfloat, sizeequivalentint, #prefetch, |
19 |
| - vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone, vadd1, relu |
| 7 | +using VectorizationBase, SLEEFPirates, UnPack, OffsetArrays |
| 8 | +using VectorizationBase: REGISTER_SIZE, data, |
| 9 | + mask, pick_vector_width_val, MM, |
| 10 | + maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, vzero, |
| 11 | + Zero, maybestaticrange, offsetprecalc, |
| 12 | + maybestaticfirst, maybestaticlast, scalar_less, gesp, pointerforcomparison, NativeTypes, staticmul, |
| 13 | + relu |
| 14 | +using IfElse: ifelse |
| 15 | + |
| 16 | +const Static = StaticInt |
| 17 | +# missing: subsetview, stridedpointer_for_broadcast, unwrap, StaticUnitRange, stridedpointers, noalias!, gepbyte, |
| 18 | +# using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, |
| 19 | +# reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum, |
| 20 | +# sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!, |
| 21 | +# vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, sizeequivalentfloat, sizeequivalentint, #prefetch, |
| 22 | +# vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone, vadd1, relu |
20 | 23 | using SLEEFPirates: pow
|
21 | 24 | using Base.Broadcast: Broadcasted, DefaultArrayStyle
|
22 | 25 | using LinearAlgebra: Adjoint, Transpose
|
@@ -46,7 +49,7 @@ If you want good performance, DO NOT use a 32-bit build of Julia if you don't ha
|
46 | 49 | const REGISTER_COUNT = Sys.ARCH === :i686 ? 8 : VectorizationBase.REGISTER_COUNT
|
47 | 50 |
|
48 | 51 | include("getconstindexes.jl")
|
49 |
| -include("vectorizationbase_extensions.jl") |
| 52 | +# include("vectorizationbase_extensions.jl") |
50 | 53 | include("predicates.jl")
|
51 | 54 | include("map.jl")
|
52 | 55 | include("filter.jl")
|
@@ -89,7 +92,7 @@ loop-reordering so as to improve performance:
|
89 | 92 | """
|
90 | 93 | LoopVectorization
|
91 | 94 |
|
92 |
| -include("precompile.jl") |
93 |
| -_precompile_() |
| 95 | +# include("precompile.jl") |
| 96 | +# _precompile_() |
94 | 97 |
|
95 | 98 | end # module
|
0 commit comments