Skip to content
11 changes: 9 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
[weakdeps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"

[extensions]
ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"]
ForwardDiffNNlibExt = ["ForwardDiff", "NNlib"]
SpecialFunctionsExt = "SpecialFunctions"

[compat]
Expand All @@ -46,6 +48,7 @@ HostCPUFeatures = "0.1.10"
IfElse = "0.1"
LayoutPointers = "0.1.11"
LinearAlgebra = "1"
NNlib = "0.9.31"
OffsetArrays = "1.4.1"
PolyesterWeave = "0.1.10, 0.2"
PrecompileTools = "1"
Expand All @@ -56,5 +59,9 @@ Static = "0.8.4, 1"
StaticArrayInterface = "1"
ThreadingUtilities = "0.5"
UnPack = "1"
VectorizationBase = "0.21.67"
julia = "1.6"
VectorizationBase = "0.21.72"
julia = "1.10"

[extras]
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
3 changes: 2 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ makedocs(;
],
# repo="https://github.com/JuliaSIMD/LoopVectorization.jl/blob/{commit}{path}#L{line}",
sitename = "LoopVectorization.jl",
authors = "Chris Elrod"
authors = "Chris Elrod",
checkdocs=:exports,
# assets=[],
)

Expand Down
11 changes: 11 additions & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# API reference

```@docs
LoopVectorization
```

## Macros

```@docs
Expand All @@ -12,6 +16,8 @@
```@docs
vmap
vmap!
vmapt
vmapt!
vmapnt
vmapnt!
vmapntt
Expand All @@ -27,7 +33,12 @@ LoopVectorization.vfilter!

## `reduce`-like constructs
```@docs
vsum
vreduce
vmapreduce
```

## Operators
```@docs
```
2 changes: 0 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,3 @@ Pages = [
]
Depth = 1
```


27 changes: 1 addition & 26 deletions ext/ForwardDiffExt.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
module ForwardDiffExt
import ForwardDiff, ChainRulesCore
using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff
using SLEEFPirates: tanh_fast, sigmoid_fast

import IfElse: ifelse
using VectorizationBase: AbstractSIMD, AbstractMask, zero_offsets

using LoopVectorization:
AbstractSIMD,
AbstractStridedPointer,
relu,
vmap,
VectorizationBase,
vmapt,
Expand Down Expand Up @@ -140,22 +140,6 @@ end
)
end
end
@generated function VectorizationBase.relu(
x::ForwardDiff.Dual{T,S,N}
) where {T,S,N}
quote
$(Expr(:meta, :inline))
v = x.value
z = zero(v)
cmp = v < z
r = ifelse(cmp, z, v)
p = x.partials
ForwardDiff.Dual{T}(
r,
ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, z, p[n]))
)
end
end

@generated function _ifelse(
m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}},
Expand Down Expand Up @@ -284,15 +268,6 @@ function ChainRulesCore.rrule(::typeof(sigmoid_fast), x)
end
s, ∂
end
function ChainRulesCore.rrule(::typeof(relu), v)
z = zero(v)
cmp = v < z
r = ifelse(cmp, z, v)
= let cmp = cmp
y -> (ChainRulesZero(), ifelse(cmp, zero(y), y))
end
r, ∂
end

function ∂vmap_singlethread!(
f::F,
Expand Down
42 changes: 42 additions & 0 deletions ext/ForwardDiffNNlibExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
module ForwardDiffNNlibExt
import ForwardDiff
using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff, NNlib

@generated function NNlib.relu(
x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N}
) where {T,S,N}
quote
$(Expr(:meta, :inline))
v = x.value
z = zero(v)
cmp = v < z
r = ifelse(cmp, z, v)
p = x.partials
ForwardDiff.Dual{T}(
r,
ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, z, p[n]))
)
end
end

@generated function NNlib.leakyrelu(
x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N},
a = 0.01
) where {T,S,N}
quote
$(Expr(:meta, :inline))
v = x.value
z = zero(v)

α = convert(typeof(v), a)
cmp = v < z
r = ifelse(cmp, α * v, v)
p = x.partials
ForwardDiff.Dual{T}(
r,
ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, α * p[n], p[n]))
)
end
end

end
2 changes: 1 addition & 1 deletion src/LoopVectorization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if isdefined(Base, :Experimental) &&
@eval Base.Experimental.@max_methods 1
end
export LowDimArray,
static, stridedpointer, *ˡ, _turbo_!, tanh_fast, sigmoid_fast
static, stridedpointer, *ˡ, tanh_fast, sigmoid_fast

using ArrayInterface: UpTri, LoTri
using Static: StaticInt, gt, static, Zero, One, reduce_tup
Expand Down
56 changes: 22 additions & 34 deletions src/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,53 +61,41 @@ function substitute_broadcast(
configarg = (inline, u₁, u₂, v, true, threads, warncheckarg, safe)
unroll_param_tup =
Expr(:call, lv(:avx_config_val), :(Val{$configarg}()), staticexpr(0))

for n 1:nargs
_ciₙ = ci[n]
if _ciₙ isa Symbol
syms[n] = _ciₙ::Symbol
else
syms[n] = Symbol('%', n)
#ciₙ::Expr = _ciₙ::Expr
if _ciₙ isa Expr
ciₙ = _ciₙ
elseif _ciₙ isa GlobalRef
ciₙ = Expr(:globalref, _ciₙ.mod, _ciₙ.name)
syms[n] = Symbol('%', n)

if _ciₙ isa Core.SSAValue
push!(lb.args, Expr(:(=), syms[n], syms[_ciₙ.id]))

elseif _ciₙ isa GlobalRef
if _ciₙ.mod === Base || _ciₙ.mod === Core
push!(lb.args, Expr(:(=), syms[n], lv(_ciₙ.name)))
else
error("Unexpected type in ci: $(typeof(_ciₙ))")
push!(lb.args, Expr(:(=), syms[n], _ciₙ.name))
end
ciₙargs = ciₙ.args
f = first(ciₙargs)
if ciₙ.head === :(=)
push!(lb.args, Expr(:(=), f, syms[((ciₙargs[2])::Core.SSAValue).id]))
elseif isglobalref(f, Base, :materialize!)
add_ci_call!(
lb,
lv(:vmaterialize!),
ciₙargs,
syms,
n,
unroll_param_tup,
mod
)

elseif _ciₙ isa Expr && _ciₙ.head === :call
f = first(_ciₙ.args)
if isglobalref(f, Base, :materialize!)
add_ci_call!(lb, lv(:vmaterialize!), _ciₙ.args, syms, n, unroll_param_tup, mod)
elseif isglobalref(f, Base, :materialize)
add_ci_call!(
lb,
lv(:vmaterialize),
ciₙargs,
syms,
n,
unroll_param_tup,
mod
)
add_ci_call!(lb, lv(:vmaterialize), _ciₙ.args, syms, n, unroll_param_tup, mod)
else
add_ci_call!(lb, f, ciₙargs, syms, n)
add_ci_call!(lb, f, _ciₙ.args, syms, n)
end

else
push!(lb.args, Expr(:(=), syms[n], _ciₙ))
end
end

ret::Expr = pop!(lb.args)::Expr
if Meta.isexpr(ret, :(=), 2)
ret = (ret.args[2])::Expr
end

esc(Expr(:let, lb, Expr(:block, ret)))
end

Expand Down
20 changes: 5 additions & 15 deletions test/forwarddiffext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,6 @@ function tovec(x::ForwardDiff.Dual{T,V,N}) where {T,V,N}
return ret
end

if LoopVectorization.ifelse !== Base.ifelse
@inline function NNlib.leakyrelu(
x::LoopVectorization.AbstractSIMD,
a = NNlib.oftf(x, NNlib.leakyrelu_a),
)
LoopVectorization.ifelse(x > zero(x), float(x), NNlib.oftf(x, a * x)) # max(a*x, x) is 3x slower
end
@inline function NNlib.leakyrelu(
x::ForwardDiff.Dual{<:Any,<:LoopVectorization.AbstractSIMD},
a = NNlib.oftf(x, NNlib.leakyrelu_a),
)
LoopVectorization.ifelse(x > zero(x), float(x), NNlib.oftf(x, a * x)) # max(a*x, x) is 3x slower
end
end

vx0 = randnvec()
vx1 = randnvec()
vx2 = randnvec()
Expand All @@ -50,3 +35,8 @@ vud = ForwardDiff.Dual(vu0, vu1, vu2)
reinterpret(Float64, NNlib.leakyrelu.(tovec(vd0)))
@test reinterpret(Float64, tovec(NNlib.leakyrelu(vud)))
reinterpret(Float64, NNlib.leakyrelu.(tovec(vud)))

@test reinterpret(Float64, tovec(NNlib.relu(vd0)))
reinterpret(Float64, NNlib.relu.(tovec(vd0)))
@test reinterpret(Float64, tovec(NNlib.relu(vud)))
reinterpret(Float64, NNlib.relu.(tovec(vud)))
Loading