Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions src/BiotSavart/BiotSavart.jl
Original file line number Diff line number Diff line change
Expand Up @@ -672,22 +672,30 @@ end

# CPU -> CPU copy (change of vector "format")
function _copy_output_values_on_nodes!(::CPU, op::F, vs::AllFilamentVelocities, vs_h::AbstractVector, ignored = nothing) where {F}
n = 0
@inbounds for vf in vs, j in eachindex(vf)
q = vs_h[n += 1]
vf[j] = @inline op(vf[j], q) # typically op == +, meaning that we add to the previous value
@sync for chunk in FilamentChunkIterator(vs)
Threads.@spawn for (i, inds, n) in chunk
vf = vs[i]
@inbounds for j in inds
q = vs_h[n += 1]
vf[j] = @inline op(vf[j], q) # typically op == +, meaning that we add to the previous value
end
end
end
vs
end

# Same as above, but for a single component `i`.
function _copy_output_values_on_nodes!(::CPU, op::F, vs::AllFilamentVelocities, i::Int, vs_h::HostVector{T}, ignored = nothing) where {F, T}
n = 0
@inbounds for vf in vs, j in eachindex(vf)
q = vs_h[n += 1]::T
v = vf[j]
vi_new = @inline op(v[i], q) # typically op == +, meaning that we add to the previous value
vf[j] = Base.setindex(v, vi_new, i)
@sync for chunk in FilamentChunkIterator(vs)
Threads.@spawn for (filament_idx, inds, n) in chunk
vf = vs[filament_idx]
@inbounds for j in inds
q = vs_h[n += 1]::T
v = vf[j]
vi_new = @inline op(v[i], q) # typically op == +, meaning that we add to the previous value
vf[j] = Base.setindex(v, vi_new, i)
end
end
end
vs
end
Expand Down
28 changes: 25 additions & 3 deletions src/BiotSavart/host_device_transfers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,24 +92,44 @@ end
# package extension.
copyto_ptr!(dst, src, n) = unsafe_copyto!(pointer(dst), pointer(src), n)

# TODO: parallelise copy when src is on the CPU?
function Base.copyto!(v::HostVector{T}, src::DenseArray{T}) where {T}
n = length(src)
@assert length(v) == n # already resized
# This should work both when src is either a CPU or a GPU array.
GC.@preserve src v begin
copyto_ptr!(v, src, n)
end
v
end

# Specialisation for CPU -> CPU case
function Base.copyto!(v::HostVector{T}, src::Array{T}) where {T}
n = length(src)
@assert length(v) == n # already resized
Threads.@threads for i in eachindex(src, v)
@inbounds v[i] = src[i]
end
v
end

# TODO: parallelise copy when dst is on the CPU?
function Base.copyto!(dst::DenseArray{T}, v::HostVector{T}) where {T}
n = length(v)
@assert length(dst) == n # already resized
# This should work both when src is either a CPU or a GPU array.
GC.@preserve dst v begin
copyto_ptr!(dst, v, n)
end
dst
end

# Specialisation for CPU -> CPU case
function Base.copyto!(dst::Array{T}, v::HostVector{T}) where {T}
n = length(v)
@assert length(dst) == n # already resized
Threads.@threads for i in eachindex(dst, v)
@inbounds dst[i] = v[i]
end
dst
end

## ========================================================================================== ##
Expand All @@ -128,7 +148,9 @@ end
function copy_host_to_device!(dst::Vector, src::Vector, ::HostVector)
n = length(src)
resize_no_copy!(dst, n)
copyto!(dst, src)
Threads.@threads for i in eachindex(dst, src)
@inbounds dst[i] = src[i]
end
dst
end

Expand Down