@@ -612,23 +612,24 @@ module IteratorsMD
612612 return I, (I, n+ 1 )
613613 end
614614
615+ @inline function _splitlinear (piter:: CartesianIndices , oiter:: CartesianIndices , i:: Int )
616+ ci = Base. _to_subscript_indices (piter, i)
617+ ci[1 ], Base. _to_linear_index (oiter, tail (ci)... )
618+ end
615619 @inline function simd_outer_range (iter:: CartesianPartition )
616620 # In general, the CartesianPartition might start and stop in the middle of the outer
617621 # dimensions, thus the outer range itself is a CartesianPartition.
618622 piter = iter. parent. parent
619- ci = CartesianIndices (tail (piter. indices))
620- size1 = size (piter, 1 )
621- vindʷ = only (iter. indices) .- firstindex (piter) .+ 1
622- d₁, r₁ = divrem (first (vindʷ), size1, RoundUp)
623- d₂, r₂ = divrem (last (vindʷ) , size1, RoundUp)
624- vindᵒ = (d₁: d₂) .+ firstindex (ci) .- 1
625- outer = @inbounds view (ci, vindᵒ)
623+ ax1, oiter = split (piter, Val (1 ))
624+ vindʷ = only (iter. indices)
625+ fl, vl = _splitlinear (piter, oiter, first (vindʷ))
626+ fr, vr = _splitlinear (piter, oiter, last (vindʷ))
627+ outer = @inbounds view (oiter, vl: vr)
626628 # Use Generator to make inner loop branchless
627629 @inline function genouter (i:: Int , I:: CartesianIndex )
628- skip = i == 1 ? r₁ + size1 - 1 : 0
629- # TODO == d₂ - d₁ + 1 and == length(outer) is slower for 4d case
630- len = i == length (vindᵒ) ? r₂ + size1 : size1
631- skip, len - skip, I
630+ l = i == 1 ? fl : firstindex (ax1)
631+ r = i == length (outer) ? fr : lastindex (ax1)
632+ l - firstindex (ax1), r - l + 1 , I
632633 end
633634 (genouter (i, I) for (i, I) in Iterators. enumerate (outer))
634635 end
0 commit comments