From 9edd0def7e0024e132ed13b1d21f149b4b8dbee7 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Mon, 1 Sep 2025 15:49:00 -0400 Subject: [PATCH 01/11] updated substitute_broadcast --- src/constructors.jl | 55 +++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/src/constructors.jl b/src/constructors.jl index 9bdc5758..03348756 100644 --- a/src/constructors.jl +++ b/src/constructors.jl @@ -61,48 +61,35 @@ function substitute_broadcast( configarg = (inline, u₁, u₂, v, true, threads, warncheckarg, safe) unroll_param_tup = Expr(:call, lv(:avx_config_val), :(Val{$configarg}()), staticexpr(0)) + for n ∈ 1:nargs _ciₙ = ci[n] - if _ciₙ isa Symbol - syms[n] = _ciₙ::Symbol - else - syms[n] = Symbol('%', n) - #ciₙ::Expr = _ciₙ::Expr - if _ciₙ isa Expr - ciₙ = _ciₙ - elseif _ciₙ isa GlobalRef - ciₙ = Expr(:globalref, _ciₙ.mod, _ciₙ.name) + syms[n] = Symbol('%', n) + + local rhs + if _ciₙ isa Core.SSAValue + rhs = syms[_ciₙ.id] + + elseif _ciₙ isa GlobalRef + if _ciₙ.mod === Base || _ciₙ.mod === Core + rhs = lv(_ciₙ.name) else - error("Unexpected type in ci: $(typeof(_ciₙ))") + rhs = _ciₙ.name end - ciₙargs = ciₙ.args - f = first(ciₙargs) - if ciₙ.head === :(=) - push!(lb.args, Expr(:(=), f, syms[((ciₙargs[2])::Core.SSAValue).id])) - elseif isglobalref(f, Base, :materialize!) - add_ci_call!( - lb, - lv(:vmaterialize!), - ciₙargs, - syms, - n, - unroll_param_tup, - mod - ) + + elseif _ciₙ isa Expr && _ciₙ.head === :call + f = first(_ciₙ.args) + if isglobalref(f, Base, :materialize!) + add_ci_call!(lb, lv(:vmaterialize!), _ciₙ.args, syms, n, unroll_param_tup, mod) elseif isglobalref(f, Base, :materialize) - add_ci_call!( - lb, - lv(:vmaterialize), - ciₙargs, - syms, - n, - unroll_param_tup, - mod - ) + add_ci_call!(lb, lv(:vmaterialize), _ciₙ.args, syms, n, unroll_param_tup, mod) else - add_ci_call!(lb, f, ciₙargs, syms, n) + add_ci_call!(lb, f, _ciₙ.args, syms, n) end + else + rhs = _ciₙ end + push!(lb.args, Expr(:(=), syms[n], rhs)) end ret::Expr = pop!(lb.args)::Expr if Meta.isexpr(ret, :(=), 2) From 971f30987c5d8a6bcd6b4e23fbd700bba2aeb4af Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Mon, 1 Sep 2025 16:06:36 -0400 Subject: [PATCH 02/11] added continue and bumped VB version --- Project.toml | 2 +- src/constructors.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index bc474cbb..b02fa7e3 100644 --- a/Project.toml +++ b/Project.toml @@ -56,5 +56,5 @@ Static = "0.8.4, 1" StaticArrayInterface = "1" ThreadingUtilities = "0.5" UnPack = "1" -VectorizationBase = "0.21.67" +VectorizationBase = "0.21.72" julia = "1.6" diff --git a/src/constructors.jl b/src/constructors.jl index 03348756..3b344c02 100644 --- a/src/constructors.jl +++ b/src/constructors.jl @@ -86,6 +86,7 @@ function substitute_broadcast( else add_ci_call!(lb, f, _ciₙ.args, syms, n) end + continue else rhs = _ciₙ end From 4befd366f8644f680edb40a5ece3ca7050591465 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 2 Sep 2025 17:35:54 -0400 Subject: [PATCH 03/11] ForwardDiffExt: switched to NNlib activation functions --- Project.toml | 12 +++++++++--- ext/ForwardDiffExt.jl | 28 +++++++++++++++++++++++++--- test/forwarddiffext.jl | 20 +++++--------------- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index b02fa7e3..a03d98b2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" -authors = ["Chris Elrod "] version = "0.12.172" +authors = ["Chris Elrod "] [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" @@ -30,9 +30,10 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" [extensions] -ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] +ForwardDiffExt = ["ChainRulesCore", "ForwardDiff", "NNlib"] SpecialFunctionsExt = "SpecialFunctions" [compat] @@ -46,6 +47,7 @@ HostCPUFeatures = "0.1.10" IfElse = "0.1" LayoutPointers = "0.1.11" LinearAlgebra = "1" +NNlib = "0.9.31" OffsetArrays = "1.4.1" PolyesterWeave = "0.1.10, 0.2" PrecompileTools = "1" @@ -57,4 +59,8 @@ StaticArrayInterface = "1" ThreadingUtilities = "0.5" UnPack = "1" VectorizationBase = "0.21.72" -julia = "1.6" +julia = "1.10" + +[extras] +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" \ No newline at end of file diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl index 26227f69..30efd94a 100644 --- a/ext/ForwardDiffExt.jl +++ b/ext/ForwardDiffExt.jl @@ -1,6 +1,7 @@ module ForwardDiffExt import ForwardDiff, ChainRulesCore -using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff +using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff, NNlib +using SLEEFPirates: tanh_fast, sigmoid_fast import IfElse: ifelse using VectorizationBase: AbstractSIMD, AbstractMask, zero_offsets @@ -8,7 +9,6 @@ using VectorizationBase: AbstractSIMD, AbstractMask, zero_offsets using LoopVectorization: AbstractSIMD, AbstractStridedPointer, - relu, vmap, VectorizationBase, vmapt, @@ -140,7 +140,8 @@ end ) end end -@generated function VectorizationBase.relu( + +@generated function NNlib.relu( x::ForwardDiff.Dual{T,S,N} ) where {T,S,N} quote @@ -157,6 +158,27 @@ end end end +@generated function NNlib.leakyrelu( + x::ForwardDiff.Dual{T,S,N}, + a = 0.01 +) where {T,S,N} + quote + $(Expr(:meta, :inline)) + v = x.value + z = zero(v) + + α = convert(typeof(v), a) + cmp = v < z + r = ifelse(cmp, α * v, v) + p = x.partials + ForwardDiff.Dual{T}( + r, + ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, α * p[n], p[n])) + ) + end +end + + @generated function _ifelse( m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}}, x::ForwardDiff.Dual{TAG,V,P}, diff --git a/test/forwarddiffext.jl b/test/forwarddiffext.jl index b4b905c7..32f7e8c0 100644 --- a/test/forwarddiffext.jl +++ b/test/forwarddiffext.jl @@ -16,21 +16,6 @@ function tovec(x::ForwardDiff.Dual{T,V,N}) where {T,V,N} return ret end -if LoopVectorization.ifelse !== Base.ifelse - @inline function NNlib.leakyrelu( - x::LoopVectorization.AbstractSIMD, - a = NNlib.oftf(x, NNlib.leakyrelu_a), - ) - LoopVectorization.ifelse(x > zero(x), float(x), NNlib.oftf(x, a * x)) # max(a*x, x) is 3x slower - end - @inline function NNlib.leakyrelu( - x::ForwardDiff.Dual{<:Any,<:LoopVectorization.AbstractSIMD}, - a = NNlib.oftf(x, NNlib.leakyrelu_a), - ) - LoopVectorization.ifelse(x > zero(x), float(x), NNlib.oftf(x, a * x)) # max(a*x, x) is 3x slower - end -end - vx0 = randnvec() vx1 = randnvec() vx2 = randnvec() @@ -50,3 +35,8 @@ vud = ForwardDiff.Dual(vu0, vu1, vu2) reinterpret(Float64, NNlib.leakyrelu.(tovec(vd0))) @test reinterpret(Float64, tovec(NNlib.leakyrelu(vud))) ≈ reinterpret(Float64, NNlib.leakyrelu.(tovec(vud))) + +@test reinterpret(Float64, tovec(NNlib.relu(vd0))) ≈ + reinterpret(Float64, NNlib.relu.(tovec(vd0))) +@test reinterpret(Float64, tovec(NNlib.relu(vud))) ≈ + reinterpret(Float64, NNlib.relu.(tovec(vud))) \ No newline at end of file From 6ce5e102d15bf6dffd2917cee2afd94f63e2152d Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 2 Sep 2025 22:04:44 -0400 Subject: [PATCH 04/11] cleaned up substitute_broadcast --- src/constructors.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/constructors.jl b/src/constructors.jl index 3b344c02..3391ea2d 100644 --- a/src/constructors.jl +++ b/src/constructors.jl @@ -66,15 +66,14 @@ function substitute_broadcast( _ciₙ = ci[n] syms[n] = Symbol('%', n) - local rhs if _ciₙ isa Core.SSAValue - rhs = syms[_ciₙ.id] + push!(lb.args, Expr(:(=), syms[n], syms[_ciₙ.id])) elseif _ciₙ isa GlobalRef if _ciₙ.mod === Base || _ciₙ.mod === Core - rhs = lv(_ciₙ.name) + push!(lb.args, Expr(:(=), syms[n], lv(_ciₙ.name))) else - rhs = _ciₙ.name + push!(lb.args, Expr(:(=), syms[n], _ciₙ.name)) end elseif _ciₙ isa Expr && _ciₙ.head === :call @@ -86,16 +85,17 @@ function substitute_broadcast( else add_ci_call!(lb, f, _ciₙ.args, syms, n) end - continue + else - rhs = _ciₙ + push!(lb.args, Expr(:(=), syms[n], _ciₙ)) end - push!(lb.args, Expr(:(=), syms[n], rhs)) end + ret::Expr = pop!(lb.args)::Expr if Meta.isexpr(ret, :(=), 2) ret = (ret.args[2])::Expr end + esc(Expr(:let, lb, Expr(:block, ret))) end From f11c21abfe29cf4eaf21c31c556e44e3173d9740 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 9 Sep 2025 16:02:13 -0400 Subject: [PATCH 05/11] docs: now builds only exported functions. removed _turbo!_ from exports. --- Project.toml | 6 +++--- docs/make.jl | 4 ++-- docs/src/api.md | 11 +++++++++++ docs/src/index.md | 2 -- src/LoopVectorization.jl | 2 +- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index a03d98b2..8a1d39a9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" -version = "0.12.172" authors = ["Chris Elrod "] +version = "0.12.172" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" @@ -29,8 +29,8 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" [weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" [extensions] ForwardDiffExt = ["ChainRulesCore", "ForwardDiff", "NNlib"] @@ -63,4 +63,4 @@ julia = "1.10" [extras] ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" \ No newline at end of file +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" diff --git a/docs/make.jl b/docs/make.jl index b537d66f..531e73b0 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,7 +11,6 @@ makedocs(; "examples/matrix_multiplication.md", "examples/array_interface.md", "examples/matrix_vector_ops.md", - "examples/dot_product.md", "examples/datetime_arrays.md", "examples/special_functions.md", "examples/sum_of_squared_error.md", @@ -31,7 +30,8 @@ makedocs(; ], # repo="https://github.com/JuliaSIMD/LoopVectorization.jl/blob/{commit}{path}#L{line}", sitename = "LoopVectorization.jl", - authors = "Chris Elrod" + authors = "Chris Elrod", + checkdocs=:exports, # assets=[], ) diff --git a/docs/src/api.md b/docs/src/api.md index 418844d1..cceae714 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -1,5 +1,9 @@ # API reference +```@docs +LoopVectorization +``` + ## Macros ```@docs @@ -12,6 +16,8 @@ ```@docs vmap vmap! +vmapt +vmapt! vmapnt vmapnt! vmapntt @@ -27,7 +33,12 @@ LoopVectorization.vfilter! ## `reduce`-like constructs ```@docs +vsum vreduce vmapreduce ``` +## Operators +```@docs +*ˡ +``` diff --git a/docs/src/index.md b/docs/src/index.md index d74d43fe..dd424210 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -30,5 +30,3 @@ Pages = [ ] Depth = 1 ``` - - diff --git a/src/LoopVectorization.jl b/src/LoopVectorization.jl index 4af0f832..c86ca2da 100644 --- a/src/LoopVectorization.jl +++ b/src/LoopVectorization.jl @@ -25,7 +25,7 @@ if isdefined(Base, :Experimental) && @eval Base.Experimental.@max_methods 1 end export LowDimArray, - static, stridedpointer, *ˡ, _turbo_!, tanh_fast, sigmoid_fast + static, stridedpointer, *ˡ, tanh_fast, sigmoid_fast using ArrayInterface: UpTri, LoTri using Static: StaticInt, gt, static, Zero, One, reduce_tup From 4fd0f3db7e209b9d2a09eb77e95ba8a0a9091617 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Fri, 3 Oct 2025 20:01:44 -0400 Subject: [PATCH 06/11] docs: added dot_product.md back to examples list --- docs/make.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/make.jl b/docs/make.jl index 531e73b0..96808c54 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,6 +11,7 @@ makedocs(; "examples/matrix_multiplication.md", "examples/array_interface.md", "examples/matrix_vector_ops.md", + "examples/dot_product.md", "examples/datetime_arrays.md", "examples/special_functions.md", "examples/sum_of_squared_error.md", From c46223c4030fd72a65872ce55c6c894e6962ae20 Mon Sep 17 00:00:00 2001 From: Maximilian Pochapski <67759684+mxpoch@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:40:20 -0400 Subject: [PATCH 07/11] ForwardDiffExt: Constrained dispatch to AbstractSIMD types --- ext/ForwardDiffExt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl index 30efd94a..b07f76e5 100644 --- a/ext/ForwardDiffExt.jl +++ b/ext/ForwardDiffExt.jl @@ -142,7 +142,7 @@ end end @generated function NNlib.relu( - x::ForwardDiff.Dual{T,S,N} + x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N} ) where {T,S,N} quote $(Expr(:meta, :inline)) @@ -159,7 +159,7 @@ end end @generated function NNlib.leakyrelu( - x::ForwardDiff.Dual{T,S,N}, + x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N}, a = 0.01 ) where {T,S,N} quote From f07def870b679ba033bd0dfe3aceedf3cad1452e Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 8 Oct 2025 01:35:35 -0400 Subject: [PATCH 08/11] Update Project.toml --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8a1d39a9..fe28b95b 100644 --- a/Project.toml +++ b/Project.toml @@ -33,7 +33,8 @@ NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" [extensions] -ForwardDiffExt = ["ChainRulesCore", "ForwardDiff", "NNlib"] +ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] +ForwardDiffNNlibExt = ["ForwardDiff", "NNlib"] SpecialFunctionsExt = "SpecialFunctions" [compat] From d6e39b13e3ffcc433927b20af3599c601f67a489 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 8 Oct 2025 01:37:53 -0400 Subject: [PATCH 09/11] Create ForwardDiffNNlibExt.jl --- ext/ForwardDiffNNlibExt.jl | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 ext/ForwardDiffNNlibExt.jl diff --git a/ext/ForwardDiffNNlibExt.jl b/ext/ForwardDiffNNlibExt.jl new file mode 100644 index 00000000..3c07d5d7 --- /dev/null +++ b/ext/ForwardDiffNNlibExt.jl @@ -0,0 +1,42 @@ +module ForwardDiffNNlibExt +import ForwardDiff +using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff, NNlib + +@generated function NNlib.relu( + x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N} +) where {T,S,N} + quote + $(Expr(:meta, :inline)) + v = x.value + z = zero(v) + cmp = v < z + r = ifelse(cmp, z, v) + p = x.partials + ForwardDiff.Dual{T}( + r, + ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, z, p[n])) + ) + end +end + +@generated function NNlib.leakyrelu( + x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N}, + a = 0.01 +) where {T,S,N} + quote + $(Expr(:meta, :inline)) + v = x.value + z = zero(v) + + α = convert(typeof(v), a) + cmp = v < z + r = ifelse(cmp, α * v, v) + p = x.partials + ForwardDiff.Dual{T}( + r, + ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, α * p[n], p[n])) + ) + end +end + +end From 7671a4f0581ee38c258c4d9336cc056b4b53e9c9 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 8 Oct 2025 01:38:21 -0400 Subject: [PATCH 10/11] Update ForwardDiffExt.jl --- ext/ForwardDiffExt.jl | 40 +--------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl index b07f76e5..5b953b7b 100644 --- a/ext/ForwardDiffExt.jl +++ b/ext/ForwardDiffExt.jl @@ -1,6 +1,6 @@ module ForwardDiffExt import ForwardDiff, ChainRulesCore -using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff, NNlib +using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff using SLEEFPirates: tanh_fast, sigmoid_fast import IfElse: ifelse @@ -141,44 +141,6 @@ end end end -@generated function NNlib.relu( - x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N} -) where {T,S,N} - quote - $(Expr(:meta, :inline)) - v = x.value - z = zero(v) - cmp = v < z - r = ifelse(cmp, z, v) - p = x.partials - ForwardDiff.Dual{T}( - r, - ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, z, p[n])) - ) - end -end - -@generated function NNlib.leakyrelu( - x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N}, - a = 0.01 -) where {T,S,N} - quote - $(Expr(:meta, :inline)) - v = x.value - z = zero(v) - - α = convert(typeof(v), a) - cmp = v < z - r = ifelse(cmp, α * v, v) - p = x.partials - ForwardDiff.Dual{T}( - r, - ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, α * p[n], p[n])) - ) - end -end - - @generated function _ifelse( m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}}, x::ForwardDiff.Dual{TAG,V,P}, From db7af8ca6a737f9bd3711a77b23bdbb33caf69d4 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 8 Oct 2025 01:41:50 -0400 Subject: [PATCH 11/11] Update ForwardDiffExt.jl --- ext/ForwardDiffExt.jl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl index 5b953b7b..765570ec 100644 --- a/ext/ForwardDiffExt.jl +++ b/ext/ForwardDiffExt.jl @@ -268,15 +268,6 @@ function ChainRulesCore.rrule(::typeof(sigmoid_fast), x) end s, ∂ end -function ChainRulesCore.rrule(::typeof(relu), v) - z = zero(v) - cmp = v < z - r = ifelse(cmp, z, v) - ∂ = let cmp = cmp - y -> (ChainRulesZero(), ifelse(cmp, zero(y), y)) - end - r, ∂ -end function ∂vmap_singlethread!( f::F,