1
+
2
+
3
+
1
4
function uniquearrayrefs_csesummary (ls:: LoopSet )
2
5
uniquerefs = ArrayReferenceMeta[]
3
6
# each `Vector{Tuple{Int,Int}}` has the same name
@@ -100,21 +103,21 @@ function indices_calculated_by_pointer_offsets(ls::LoopSet, ar::ArrayReferenceMe
100
103
out
101
104
end
102
105
103
- @generated function set_first_stride (sptr:: StridedPointer{T,N,C,B,R} ) where {T,N,C,B,R}
104
- minrank = argmin (R)
105
- newC = C > 0 ? (C == minrank ? 1 : 0 ) : C
106
- newB = C > 0 ? (C == minrank ? B : 0 ) : B # TODO : confirm correctness
107
- quote
108
- $ (Expr (:meta ,:inline ))
109
- # VectorizationBase.StridedPointer{$T,1,$newC,$newB,$(R[minrank],)}($(lv(llvmptr))(sptr), (sptr.strd[$minrank],), (Zero(),))
110
- VectorizationBase. StridedPointer {$T,1,$newC,$newB,$(R[minrank],)} (VectorizationBase. cpupointer (sptr), (sptr. strd[$ minrank],), (Zero (),))
111
- end
112
- end
113
- set_first_stride (x) = x # cross fingers that this works
114
- @inline onetozeroindexgephack (sptr:: AbstractStridedPointer ) = gesp (set_first_stride (sptr), (Static {-1} (),)) # go backwords
115
- @inline onetozeroindexgephack (sptr:: AbstractStridedPointer{T,1} ) where {T} = sptr
106
+ # @generated function set_first_stride(sptr::StridedPointer{T,N,C,B,R}) where {T,N,C,B,R}
107
+ # minrank = argmin(R)
108
+ # newC = C > 0 ? (C == minrank ? 1 : 0) : C
109
+ # newB = C > 0 ? (C == minrank ? B : 0) : B #TODO : confirm correctness
110
+ # quote
111
+ # $(Expr(:meta,:inline))
112
+ # # VectorizationBase.StridedPointer{$T,1,$newC,$newB,$(R[minrank],)}($(lv(llvmptr))(sptr), (sptr.strd[$minrank],), (Zero(),))
113
+ # VectorizationBase.StridedPointer{$T,1,$newC,$newB,$(R[minrank],)}(VectorizationBase.cpupointer(sptr), (sptr.strd[$minrank],), (Zero(),))
114
+ # end
115
+ # end
116
+ # set_first_stride(x) = x # cross fingers that this works
117
+ # @inline onetozeroindexgephack(sptr::AbstractStridedPointer) = gesp(set_first_stride(sptr), (Static{-1}(),)) # go backwords
118
+ # @inline onetozeroindexgephack(sptr::AbstractStridedPointer{T,1}) where {T} = sptr
116
119
# @inline onetozeroindexgephack(sptr::StridedPointer{T,1}) where {T} = sptr
117
- @inline onetozeroindexgephack (x) = x
120
+ # @inline onetozeroindexgephack(x) = x
118
121
119
122
# # Removes parent/child relationship for all children with ref `ar`
120
123
# function freechildren!(op::Operation, ar::ArrayReferenceMeta)
@@ -586,6 +589,7 @@ function use_loop_induct_var!(
586
589
vpgesped = Expr (:call , lv (:offsetprecalc ), vpgesped, Expr (:call , Expr (:curly , :Val , offsetprecalc_descript)))
587
590
end
588
591
push! (q. args, Expr (:(= ), vptrar, vpgesped))
592
+ push! (q. args, Expr (:(= ), vptr_offset (vptrar), Expr (:call , GlobalRef (VectorizationBase, :increment_ptr ), vptrar)))
589
593
end
590
594
uliv
591
595
end
@@ -654,7 +658,7 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
654
658
stop = last (loop)
655
659
incr = step (loop)
656
660
if isknown (start) & isknown (stop)
657
- pointermax (ls, ar, n, sub, isvectorized, 1 + gethint (stop) - gethint (start), incr)
661
+ return pointermax (ls, ar, n, sub, isvectorized, 1 + gethint (stop) - gethint (start), incr)
658
662
end
659
663
looplensym = isone (start) ? getsym (stop) : loop. lensym
660
664
pointermax (ls, ar, n, sub, isvectorized, looplensym, incr)
@@ -740,8 +744,9 @@ function pointermax_index(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int,
740
744
index, ind
741
745
end
742
746
function pointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool , stopsym, incr:: MaybeKnown ):: Expr
743
- index = first (pointermax_index (ls, ar, n, sub, isvectorized, stopsym, incr))
744
- Expr (:call , lv (:gesp ), vptr (ar), index)
747
+ index = first (pointermax_index (ls, ar, n, sub, isvectorized, stopsym, incr))
748
+ vptrar = vptr (ar)
749
+ Expr (:call , GlobalRef (VectorizationBase,:increment_ptr ), vptrar, vptr_offset (vptrar), index)
745
750
end
746
751
747
752
function defpointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool ):: Expr
@@ -767,58 +772,48 @@ end
767
772
function append_pointer_maxes! (
768
773
loopstart:: Expr , ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , submax:: Int , isvectorized:: Bool , stopindicator, incr:: MaybeKnown
769
774
)
770
- vptr_ar = vptr (ar)
771
- if submax < 2
772
- for sub ∈ 0 : submax
773
- push! (loopstart. args, Expr (:(= ), maxsym (vptr_ar, sub), pointermax (ls, ar, n, sub, isvectorized, stopindicator, incr)))
774
- end
775
- else
776
- index, ind = pointermax_index (ls, ar, n, submax, isvectorized, stopindicator, incr)
777
- pointercompbase = maxsym (vptr_ar, submax)
778
- push! (loopstart. args, Expr (:(= ), pointercompbase, Expr (:call , lv (:gesp ), vptr_ar, index)))
779
- dim = length (getindicesonly (ar))
780
- # OFFSETPRECALCDEF = true
781
- # if OFFSETPRECALCDEF
782
- strd = getstrides (ar)[ind]
783
- for sub ∈ 0 : submax- 1
784
- ptrcmp = Expr (:call , lv (:gesp ), pointercompbase, offsetindex (dim, ind, (submax - sub)* strd, isvectorized, incr))
785
- push! (loopstart. args, Expr (:(= ), maxsym (vptr_ar, sub), ptrcmp))
786
- end
787
- # else
788
- # indexoff = offsetindex(dim, ind, 1, isvectorized)
789
- # for sub ∈ submax-1:-1:0
790
- # _newpointercompbase = maxsym(vptr_ar, sub)
791
- # newpointercompbase = gensym(_pointercompbase)
792
- # push!(loopstart.args, Expr(:(=), newpointercompbase, Expr(:call, lv(:gesp), pointercompbase, indexoff)))
793
- # push!(loopstart.args, Expr(:(=), _newpointercompbase, Expr(:call, lv(:pointerforcomparison), newpointercompbase)))
794
- # _pointercompbase = _newpointercompbase
795
- # pointercompbase = newpointercompbase
796
- # end
797
- # end
775
+ vptr_ar = vptr (ar)
776
+ if submax < 2
777
+ for sub ∈ 0 : submax
778
+ push! (loopstart. args, Expr (:(= ), maxsym (vptr_ar, sub), pointermax (ls, ar, n, sub, isvectorized, stopindicator, incr)))
798
779
end
780
+ else
781
+ index, ind = pointermax_index (ls, ar, n, submax, isvectorized, stopindicator, incr)
782
+ pointercompbase = maxsym (vptr_ar, submax)
783
+ ip = GlobalRef (VectorizationBase, :increment_ptr )
784
+ push! (loopstart. args, Expr (:(= ), pointercompbase, Expr (:call , ip, vptr_ar, vptr_offset (vptr_ar), index)))
785
+ dim = length (getindicesonly (ar))
786
+ # OFFSETPRECALCDEF = true
787
+ # if OFFSETPRECALCDEF
788
+ strd = getstrides (ar)[ind]
789
+ for sub ∈ 0 : submax- 1
790
+ ptrcmp = Expr (:call , ip, vptr_ar, pointercompbase, offsetindex (dim, ind, (submax - sub)* strd, isvectorized, incr))
791
+ push! (loopstart. args, Expr (:(= ), maxsym (vptr_ar, sub), ptrcmp))
792
+ end
793
+ end
799
794
end
800
795
function append_pointer_maxes! (loopstart:: Expr , ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , submax:: Int , isvectorized:: Bool )
801
- loop = getloop (ls, n)
802
- @assert loop. itersymbol == names (ls)[n]
803
- start = first (loop)
804
- stop = last (loop)
805
- incr = step (loop)
806
- if isknown (start) & isknown (stop)
807
- return append_pointer_maxes! (loopstart, ls, ar, n, submax, isvectorized, startstopΔ (loop)+ 1 , incr)
808
- end
809
- looplensym = isone (start) ? getsym (stop) : loop. lensym
810
- append_pointer_maxes! (loopstart, ls, ar, n, submax, isvectorized, looplensym, incr)
796
+ loop = getloop (ls, n)
797
+ @assert loop. itersymbol == names (ls)[n]
798
+ start = first (loop)
799
+ stop = last (loop)
800
+ incr = step (loop)
801
+ if isknown (start) & isknown (stop)
802
+ return append_pointer_maxes! (loopstart, ls, ar, n, submax, isvectorized, startstopΔ (loop)+ 1 , incr)
803
+ end
804
+ looplensym = isone (start) ? getsym (stop) : loop. lensym
805
+ append_pointer_maxes! (loopstart, ls, ar, n, submax, isvectorized, looplensym, incr)
811
806
end
812
807
813
808
function maxunroll (us:: UnrollSpecification , n)
814
- @unpack u₁loopnum, u₂loopnum, u₁, u₂ = us
815
- if n == u₁loopnum# && u₁ > 1
816
- u₁
817
- elseif n == u₂loopnum# && u₂ > 1
818
- u₂
819
- else
820
- 1
821
- end
809
+ @unpack u₁loopnum, u₂loopnum, u₁, u₂ = us
810
+ if n == u₁loopnum# && u₁ > 1
811
+ u₁
812
+ elseif n == u₂loopnum# && u₂ > 1
813
+ u₂
814
+ else
815
+ 1
816
+ end
822
817
end
823
818
824
819
@@ -830,8 +825,8 @@ function startloop(ls::LoopSet, us::UnrollSpecification, n::Int, submax = maxunr
830
825
loopstart = Expr (:block )
831
826
firstloop = n == num_loops (ls)
832
827
for ar ∈ ptrdefs
833
- ptr = vptr (ar)
834
- push! (loopstart. args, Expr (:(= ), ptr, ptr ))
828
+ ptr_offset = vptr_offset (ar)
829
+ push! (loopstart. args, Expr (:(= ), ptr_offset, ptr_offset ))
835
830
end
836
831
if iszero (termind)
837
832
loopsym = names (ls)[n]
@@ -845,22 +840,24 @@ end
845
840
function offset_ptr (
846
841
ar:: ArrayReferenceMeta , us:: UnrollSpecification , loopsym:: Symbol , n:: Int , UF:: Int , offsetinds:: Vector{Bool} , loop:: Loop
847
842
)
848
- indices = getindices (ar)
849
- strides = getstrides (ar)
850
- offset = first (indices) === DISCONTIGUOUS
851
- gespinds = Expr (:tuple )
852
- li = ar. loopedindex
853
- for i ∈ eachindex (li)
854
- ii = i + offset
855
- ind = indices[ii]
856
- if ! offsetinds[i] || ind != = loopsym
857
- push! (gespinds. args, Expr (:call , lv (:Zero )))
858
- else
859
- incrementloopcounter! (gespinds, us, n, UF * strides[i], loop)
860
- end
861
- # ind == loopsym && break
843
+ indices = getindices (ar)
844
+ strides = getstrides (ar)
845
+ offset = first (indices) === DISCONTIGUOUS
846
+ gespinds = Expr (:tuple )
847
+ li = ar. loopedindex
848
+ for i ∈ eachindex (li)
849
+ ii = i + offset
850
+ ind = indices[ii]
851
+ if ! offsetinds[i] || ind != = loopsym
852
+ push! (gespinds. args, Expr (:call , lv (:Zero )))
853
+ else
854
+ incrementloopcounter! (gespinds, us, n, UF * strides[i], loop)
862
855
end
863
- Expr (:(= ), vptr (ar), Expr (:call , lv (:gesp ), vptr (ar), gespinds))
856
+ # ind == loopsym && break
857
+ end
858
+ vpoff = vptr_offset (ar)
859
+ call = Expr (:call , GlobalRef (VectorizationBase, :increment_ptr ), vptr (ar), vpoff, gespinds)
860
+ Expr (:(= ), vpoff, call)
864
861
end
865
862
function incrementloopcounter! (q:: Expr , ls:: LoopSet , us:: UnrollSpecification , n:: Int , UF:: Int )
866
863
@unpack u₁loopnum, u₂loopnum, vloopnum, u₁, u₂ = us
@@ -880,18 +877,19 @@ function incrementloopcounter!(q::Expr, ls::LoopSet, us::UnrollSpecification, n:
880
877
nothing
881
878
end
882
879
function terminatecondition (ls:: LoopSet , us:: UnrollSpecification , n:: Int , inclmask:: Bool , UF:: Int )
883
- lssm = ls. lssm
884
- termind = lssm. terminators[n]
885
- if iszero (termind)
886
- loop = getloop (ls, n)
887
- return terminatecondition (loop, us, n, loop. itersymbol, inclmask, UF)
888
- end
880
+ lssm = ls. lssm
881
+ termind = lssm. terminators[n]
882
+ if iszero (termind)
883
+ loop = getloop (ls, n)
884
+ return terminatecondition (loop, us, n, loop. itersymbol, inclmask, UF)
885
+ end
889
886
890
- termar = lssm. incrementedptrs[n][termind]
891
- ptr = vptr (termar)
892
- if inclmask && isvectorized (us, n)
893
- Expr (:call , :< , ptr, maxsym (ptr, 0 ))
894
- else
895
- Expr (:call , :≤ , ptr, maxsym (ptr, UF))
896
- end
887
+ termar = lssm. incrementedptrs[n][termind]
888
+ ptr = vptr (termar)
889
+ optr = vptr_offset (ptr)
890
+ if inclmask && isvectorized (us, n)
891
+ Expr (:call , GlobalRef (VectorizationBase, :vlt ), optr, maxsym (ptr, 0 ), ptr)
892
+ else
893
+ Expr (:call , GlobalRef (VectorizationBase, :vle ), optr, maxsym (ptr, UF), ptr)
894
+ end
897
895
end
0 commit comments