@@ -613,32 +613,28 @@ module IteratorsMD
613613 end
614614
615615 @inline function simd_outer_range (iter:: CartesianPartition )
616- # In general, the Cartesian Partition might start and stop in the middle of the outer
617- # dimensions — thus the outer range of a CartesianPartition is itself a
618- # CartesianPartition.
616+ # In general, the CartesianPartition might start and stop in the middle of the outer
617+ # dimensions, thus the outer range itself is a CartesianPartition.
619618 piter = iter. parent. parent
620619 ci = CartesianIndices (tail (piter. indices))
621620 size1 = size (piter, 1 )
622- offsetʷ = firstindex (piter) - 1
623- vindʷ = only (iter. indices) .- offsetʷ
624- offsetᵒ = firstindex (ci) - 1
625- vindᵒ = cld (first (vindʷ) , size1) : cld (last (vindʷ), size1)
621+ vindʷ = only (iter. indices) .- firstindex (piter) .+ 1
622+ d₁, r₁ = divrem (first (vindʷ), size1, RoundUp)
623+ d₂, r₂ = divrem (last (vindʷ) , size1, RoundUp)
624+ vindᵒ = (d₁: d₂) .+ firstindex (ci) .- 1
625+ outer = @inbounds view (ci, vindᵒ)
626626 # Use Generator to make inner loop branchless
627627 @inline function genouter (i:: Int , I:: CartesianIndex )
628- l, r = first (vindᵒ), last (vindᵒ)
629- skip = i == 1 ? first (vindʷ) - 1 - (l - 1 ) * size1 : 0
630- len = i == length (vindᵒ) ? last (vindʷ) - (r - 1 ) * size1 : size1
628+ skip = i == 1 ? r₁ + size1 - 1 : 0
629+ # TODO == d₂ - d₁ + 1 and == length(outer) is slower for 4d case
630+ len = i == length (vindᵒ) ? r₂ + size1 : size1
631631 skip, len - skip, I
632632 end
633- outer = @inbounds view (ci, vindᵒ .+ offsetᵒ)
634633 (genouter (i, I) for (i, I) in Iterators. enumerate (outer))
635634 end
636- @inline simd_inner_length (iter:: CartesianPartition , (skip, len, I):: Tuple{Int,Int,CartesianIndex} ) = len
637- @inline function simd_index (iter:: CartesianPartition , (skip, len, I):: Tuple{Int,Int,CartesianIndex} , n:: Int )
638- inner = first (iter. parent. parent. indices)
639- n += skip + firstindex (inner)
640- @inbounds CartesianIndex (inner[n], I)
641- end
635+ @inline simd_inner_length (iter:: CartesianPartition , (_, len, _):: Tuple{Int,Int,CartesianIndex} ) = len
636+ @propagate_inbounds simd_index (iter:: CartesianPartition , (skip, _, I):: Tuple{Int,Int,CartesianIndex} , n:: Int ) =
637+ simd_index (iter. parent. parent, I, n + skip)
642638end # IteratorsMD
643639
644640
0 commit comments