Skip to content

Commit 382f5ad

Browse files
committed
Performance Optimization
1. reuse `CartesianIndices`'s `simd_index` 2. make 4d `@simd` faster (I don't know why)
1 parent 86ee774 commit 382f5ad

File tree

1 file changed

+13
-17
lines changed

1 file changed

+13
-17
lines changed

base/multidimensional.jl

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -613,32 +613,28 @@ module IteratorsMD
613613
end
614614

615615
@inline function simd_outer_range(iter::CartesianPartition)
616-
# In general, the Cartesian Partition might start and stop in the middle of the outer
617-
# dimensions — thus the outer range of a CartesianPartition is itself a
618-
# CartesianPartition.
616+
# In general, the CartesianPartition might start and stop in the middle of the outer
617+
# dimensions, thus the outer range itself is a CartesianPartition.
619618
piter = iter.parent.parent
620619
ci = CartesianIndices(tail(piter.indices))
621620
size1 = size(piter, 1)
622-
offsetʷ = firstindex(piter) - 1
623-
vindʷ = only(iter.indices) .- offsetʷ
624-
offsetᵒ = firstindex(ci) - 1
625-
vindᵒ = cld(first(vindʷ) , size1) : cld(last(vindʷ), size1)
621+
vindʷ = only(iter.indices) .- firstindex(piter) .+ 1
622+
d₁, r₁ = divrem(first(vindʷ), size1, RoundUp)
623+
d₂, r₂ = divrem(last(vindʷ) , size1, RoundUp)
624+
vindᵒ = (d₁:d₂) .+ firstindex(ci) .- 1
625+
outer = @inbounds view(ci, vindᵒ)
626626
# Use Generator to make inner loop branchless
627627
@inline function genouter(i::Int, I::CartesianIndex)
628-
l, r = first(vindᵒ), last(vindᵒ)
629-
skip = i == 1 ? first(vindʷ) - 1 - (l - 1) * size1 : 0
630-
len = i == length(vindᵒ) ? last(vindʷ) - (r - 1) * size1 : size1
628+
skip = i == 1 ? r₁ + size1 - 1 : 0
629+
#TODO == d₂ - d₁ + 1 and == length(outer) is slower for 4d case
630+
len = i == length(vindᵒ) ? r₂ + size1 : size1
631631
skip, len - skip, I
632632
end
633-
outer = @inbounds view(ci, vindᵒ .+ offsetᵒ)
634633
(genouter(i, I) for (i, I) in Iterators.enumerate(outer))
635634
end
636-
@inline simd_inner_length(iter::CartesianPartition, (skip, len, I)::Tuple{Int,Int,CartesianIndex}) = len
637-
@inline function simd_index(iter::CartesianPartition, (skip, len, I)::Tuple{Int,Int,CartesianIndex}, n::Int)
638-
inner = first(iter.parent.parent.indices)
639-
n += skip + firstindex(inner)
640-
@inbounds CartesianIndex(inner[n], I)
641-
end
635+
@inline simd_inner_length(iter::CartesianPartition, (_, len, _)::Tuple{Int,Int,CartesianIndex}) = len
636+
@propagate_inbounds simd_index(iter::CartesianPartition, (skip, _, I)::Tuple{Int,Int,CartesianIndex}, n::Int) =
637+
simd_index(iter.parent.parent, I, n + skip)
642638
end # IteratorsMD
643639

644640

0 commit comments

Comments
 (0)