@@ -591,7 +591,7 @@ module IteratorsMD
591
591
CartesianIndices (intersect .(a. indices, b. indices))
592
592
593
593
# Views of reshaped CartesianIndices are used for partitions — ensure these are fast
594
- const CartesianPartition{T<: CartesianIndex , P<: CartesianIndices , R<: ReshapedArray{T,1,P} } = SubArray{T,1 ,R,Tuple{UnitRange {Int}},false }
594
+ const CartesianPartition{T<: CartesianIndex , P<: CartesianIndices , R<: ReshapedArray{T,1,P} } = SubArray{T,1 ,R,<: Tuple{AbstractUnitRange {Int}} ,false }
595
595
eltype (:: Type{PartitionIterator{T}} ) where {T<: ReshapedArrayLF } = SubArray{eltype (T), 1 , T, Tuple{UnitRange{Int}}, true }
596
596
eltype (:: Type{PartitionIterator{T}} ) where {T<: ReshapedArray } = SubArray{eltype (T), 1 , T, Tuple{UnitRange{Int}}, false }
597
597
Iterators. IteratorEltype (:: Type{<:PartitionIterator{T}} ) where {T<: ReshapedArray } = Iterators. IteratorEltype (T)
@@ -600,7 +600,6 @@ module IteratorsMD
600
600
eltype (:: Type{PartitionIterator{T}} ) where {T<: Union{UnitRange, StepRange, StepRangeLen, LinRange} } = T
601
601
Iterators. IteratorEltype (:: Type{<:PartitionIterator{T}} ) where {T<: Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange} } = Iterators. IteratorEltype (T)
602
602
603
-
604
603
@inline function iterate (iter:: CartesianPartition )
605
604
isempty (iter) && return nothing
606
605
f = first (iter)
@@ -616,33 +615,45 @@ module IteratorsMD
616
615
# In general, the Cartesian Partition might start and stop in the middle of the outer
617
616
# dimensions — thus the outer range of a CartesianPartition is itself a
618
617
# CartesianPartition.
619
- t = tail (iter. parent. parent. indices)
620
- ci = CartesianIndices (t)
621
- li = LinearIndices (t)
622
- return @inbounds view (ci, li[tail (iter[1 ]. I)... ]: li[tail (iter[end ]. I)... ])
618
+ mi = iter. parent. mi
619
+ ci = iter. parent. parent
620
+ ax, ax1 = axes (ci), Base. axes1 (ci)
621
+ subs = Base. ind2sub_rs (ax, mi, first (iter. indices[1 ]))
622
+ vl, fl = Base. _sub2ind (tail (ax), tail (subs)... ), subs[1 ]
623
+ vr, fr = divrem (last (iter. indices[1 ]) - 1 , mi[end ]) .+ (1 , first (ax1))
624
+ oci = CartesianIndices (tail (ci. indices))
625
+ # A fake CartesianPartition to reuse the outer iterate fallback
626
+ outer = @inbounds view (ReshapedArray (oci, (length (oci),), mi), vl: vr)
627
+ init = @inbounds dec (oci[tail (subs)... ]. I, oci. indices) # real init state
628
+ # Use Generator to make inner loop branchless
629
+ @inline function skip_len_I (i:: Int , I:: CartesianIndex )
630
+ l = i == 1 ? fl : first (ax1)
631
+ r = i == length (outer) ? fr : last (ax1)
632
+ l - first (ax1), r - l + 1 , I
633
+ end
634
+ (skip_len_I (i, I) for (i, I) in Iterators. enumerate (Iterators. rest (outer, (init, 0 ))))
623
635
end
624
- function simd_outer_range (iter:: CartesianPartition{CartesianIndex{2}} )
636
+ @inline function simd_outer_range (iter:: CartesianPartition{CartesianIndex{2}} )
625
637
# But for two-dimensional Partitions the above is just a simple one-dimensional range
626
638
# over the second dimension; we don't need to worry about non-rectangular staggers in
627
639
# higher dimensions.
628
- return @inbounds CartesianIndices ((iter[1 ][2 ]: iter[end ][2 ],))
629
- end
630
- @inline function simd_inner_length (iter:: CartesianPartition , I:: CartesianIndex )
631
- inner = iter. parent. parent. indices[1 ]
632
- @inbounds fi = iter[1 ]. I
633
- @inbounds li = iter[end ]. I
634
- inner_start = I. I == tail (fi) ? fi[1 ] : first (inner)
635
- inner_end = I. I == tail (li) ? li[1 ] : last (inner)
636
- return inner_end - inner_start + 1
637
- end
638
- @inline function simd_index (iter:: CartesianPartition , Ilast:: CartesianIndex , I1:: Int )
639
- # I1 is the 0-based distance from the first dimension's offest
640
- offset = first (iter. parent. parent. indices[1 ]) # (this is 1 for 1-based arrays)
641
- # In the first column we need to also add in the iter's starting point (branchlessly)
642
- f = @inbounds iter[1 ]
643
- startoffset = (Ilast. I == tail (f. I))* (f[1 ] - 1 )
644
- CartesianIndex ((I1 + offset + startoffset, Ilast. I... ))
640
+ mi = iter. parent. mi
641
+ ci = iter. parent. parent
642
+ ax, ax1 = axes (ci), Base. axes1 (ci)
643
+ fl, vl = Base. ind2sub_rs (ax, mi, first (iter. indices[1 ]))
644
+ fr, vr = Base. ind2sub_rs (ax, mi, last (iter. indices[1 ]))
645
+ outer = @inbounds CartesianIndices ((ci. indices[2 ][vl: vr],))
646
+ # Use Generator to make inner loop branchless
647
+ @inline function skip_len_I (I:: CartesianIndex{1} )
648
+ l = I == first (outer) ? fl : first (ax1)
649
+ r = I == last (outer) ? fr : last (ax1)
650
+ l - first (ax1), r - l + 1 , I
651
+ end
652
+ (skip_len_I (I) for I in outer)
645
653
end
654
+ @inline simd_inner_length (iter:: CartesianPartition , (_, len, _):: Tuple{Int,Int,CartesianIndex} ) = len
655
+ @propagate_inbounds simd_index (iter:: CartesianPartition , (skip, _, I):: Tuple{Int,Int,CartesianIndex} , n:: Int ) =
656
+ simd_index (iter. parent. parent, I, n + skip)
646
657
end # IteratorsMD
647
658
648
659
0 commit comments