Skip to content

Commit 76b7805

Browse files
committed
Hand-crafted copyto! specialization
1 parent 70caeb9 commit 76b7805

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

src/auxiliary/auxiliary.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,27 @@ _interleave(::Tuple{}, ::Tuple{}) = ()
5555
function _interleave(a::NTuple{N}, b::NTuple{N}) where {N}
5656
return (a[1], b[1], _interleave(tail(a), tail(b))...)
5757
end
58+
59+
# Low-overhead implementation of `copyto!` for specific case of `stride(B, 1) < stride(B, 2)`
60+
# used in indexmanipulations: avoids the overhead of Strided.jl
61+
function _copyto!(A::StridedView{<:Any,1}, B::StridedView{<:Any,2})
62+
length(A) == length(B) || throw(DimensionMismatch())
63+
64+
Adata = parent(A)
65+
Astr = stride(A, 1)
66+
IA = A.offset
67+
68+
Bdata = parent(B)
69+
Bstr = strides(B)
70+
71+
IB_1 = B.offset
72+
@inbounds for _ in axes(B, 2)
73+
IB = IB_1
74+
for _ in axes(B, 1)
75+
Adata[IA += Astr] = Bdata[IB += Bstr[1]]
76+
end
77+
IB_1 += Bstr[2]
78+
end
79+
80+
return A
81+
end

src/tensors/indexmanipulations.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ function _add_transform_multi!(tdst, tsrc, p,
650650
buffer_src = StridedView(buffer2, (blocksize, cols), (1, blocksize), 0)
651651
for (i, struct_src) in enumerate(structs_src)
652652
subblock_src = sreshape(StridedView(tsrc.data, sz_src, struct_src...), matsize)
653-
copyto!(buffer_src[:, i], subblock_src)
653+
_copyto!(buffer_src[:, i], subblock_src)
654654
end
655655

656656
# Resummation into a second buffer using BLAS

0 commit comments

Comments
 (0)